intel_engine_cs.c source code [linux/drivers/gpu/drm/i915/gt/intel_engine_cs.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2016 Intel Corporation
4	*/
5
6	#include <linux/string_helpers.h>
7
8	#include <drm/drm_print.h>
9
10	#include "gem/i915_gem_context.h"
11	#include "gem/i915_gem_internal.h"
12	#include "gt/intel_gt_print.h"
13	#include "gt/intel_gt_regs.h"
14
15	#include "i915_cmd_parser.h"
16	#include "i915_drv.h"
17	#include "i915_irq.h"
18	#include "i915_reg.h"
19	#include "intel_breadcrumbs.h"
20	#include "intel_context.h"
21	#include "intel_engine.h"
22	#include "intel_engine_pm.h"
23	#include "intel_engine_regs.h"
24	#include "intel_engine_user.h"
25	#include "intel_execlists_submission.h"
26	#include "intel_gt.h"
27	#include "intel_gt_mcr.h"
28	#include "intel_gt_pm.h"
29	#include "intel_gt_requests.h"
30	#include "intel_lrc.h"
31	#include "intel_lrc_reg.h"
32	#include "intel_reset.h"
33	#include "intel_ring.h"
34	#include "uc/intel_guc_submission.h"
35
36	/ Haswell does have the CXT_SIZE register however it does not appear to be*
37	* valid. Now, docs explain in dwords what is in the context object. The full
38	* size is 70720 bytes, however, the power context and execlist context will
39	* never be saved (power context is stored elsewhere, and execlists don't work
40	* on HSW) - so the final size, including the extra state required for the
41	* Resource Streamer, is 66944 bytes, which rounds to 17 pages.
42	*/
43	#define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE)
44
45	#define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
46	#define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE)
47	#define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE)
48	#define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE)
49
50	#define GEN8_LR_CONTEXT_OTHER_SIZE (2 * PAGE_SIZE)
51
52	#define MAX_MMIO_BASES 3
53	struct engine_info {
54	u8 class;
55	u8 instance;
56	/ mmio bases table must be sorted in reverse graphics_ver order /
57	struct engine_mmio_base {
58	u32 graphics_ver : `8`;
59	u32 base : `24`;
60	} mmio_bases[MAX_MMIO_BASES];
61	};
62
63	static const struct engine_info intel_engines[] = {
64	[RCS0] = {
65	.class = RENDER_CLASS,
66	.instance = `0`,
67	.mmio_bases = {
68	{ .graphics_ver = `1`, .base = RENDER_RING_BASE }
69	},
70	},
71	[BCS0] = {
72	.class = COPY_ENGINE_CLASS,
73	.instance = `0`,
74	.mmio_bases = {
75	{ .graphics_ver = `6`, .base = BLT_RING_BASE }
76	},
77	},
78	[BCS1] = {
79	.class = COPY_ENGINE_CLASS,
80	.instance = `1`,
81	.mmio_bases = {
82	{ .graphics_ver = `12`, .base = XEHPC_BCS1_RING_BASE }
83	},
84	},
85	[BCS2] = {
86	.class = COPY_ENGINE_CLASS,
87	.instance = `2`,
88	.mmio_bases = {
89	{ .graphics_ver = `12`, .base = XEHPC_BCS2_RING_BASE }
90	},
91	},
92	[BCS3] = {
93	.class = COPY_ENGINE_CLASS,
94	.instance = `3`,
95	.mmio_bases = {
96	{ .graphics_ver = `12`, .base = XEHPC_BCS3_RING_BASE }
97	},
98	},
99	[BCS4] = {
100	.class = COPY_ENGINE_CLASS,
101	.instance = `4`,
102	.mmio_bases = {
103	{ .graphics_ver = `12`, .base = XEHPC_BCS4_RING_BASE }
104	},
105	},
106	[BCS5] = {
107	.class = COPY_ENGINE_CLASS,
108	.instance = `5`,
109	.mmio_bases = {
110	{ .graphics_ver = `12`, .base = XEHPC_BCS5_RING_BASE }
111	},
112	},
113	[BCS6] = {
114	.class = COPY_ENGINE_CLASS,
115	.instance = `6`,
116	.mmio_bases = {
117	{ .graphics_ver = `12`, .base = XEHPC_BCS6_RING_BASE }
118	},
119	},
120	[BCS7] = {
121	.class = COPY_ENGINE_CLASS,
122	.instance = `7`,
123	.mmio_bases = {
124	{ .graphics_ver = `12`, .base = XEHPC_BCS7_RING_BASE }
125	},
126	},
127	[BCS8] = {
128	.class = COPY_ENGINE_CLASS,
129	.instance = `8`,
130	.mmio_bases = {
131	{ .graphics_ver = `12`, .base = XEHPC_BCS8_RING_BASE }
132	},
133	},
134	[VCS0] = {
135	.class = VIDEO_DECODE_CLASS,
136	.instance = `0`,
137	.mmio_bases = {
138	{ .graphics_ver = `11`, .base = GEN11_BSD_RING_BASE },
139	{ .graphics_ver = `6`, .base = GEN6_BSD_RING_BASE },
140	{ .graphics_ver = `4`, .base = BSD_RING_BASE }
141	},
142	},
143	[VCS1] = {
144	.class = VIDEO_DECODE_CLASS,
145	.instance = `1`,
146	.mmio_bases = {
147	{ .graphics_ver = `11`, .base = GEN11_BSD2_RING_BASE },
148	{ .graphics_ver = `8`, .base = GEN8_BSD2_RING_BASE }
149	},
150	},
151	[VCS2] = {
152	.class = VIDEO_DECODE_CLASS,
153	.instance = `2`,
154	.mmio_bases = {
155	{ .graphics_ver = `11`, .base = GEN11_BSD3_RING_BASE }
156	},
157	},
158	[VCS3] = {
159	.class = VIDEO_DECODE_CLASS,
160	.instance = `3`,
161	.mmio_bases = {
162	{ .graphics_ver = `11`, .base = GEN11_BSD4_RING_BASE }
163	},
164	},
165	[VCS4] = {
166	.class = VIDEO_DECODE_CLASS,
167	.instance = `4`,
168	.mmio_bases = {
169	{ .graphics_ver = `12`, .base = XEHP_BSD5_RING_BASE }
170	},
171	},
172	[VCS5] = {
173	.class = VIDEO_DECODE_CLASS,
174	.instance = `5`,
175	.mmio_bases = {
176	{ .graphics_ver = `12`, .base = XEHP_BSD6_RING_BASE }
177	},
178	},
179	[VCS6] = {
180	.class = VIDEO_DECODE_CLASS,
181	.instance = `6`,
182	.mmio_bases = {
183	{ .graphics_ver = `12`, .base = XEHP_BSD7_RING_BASE }
184	},
185	},
186	[VCS7] = {
187	.class = VIDEO_DECODE_CLASS,
188	.instance = `7`,
189	.mmio_bases = {
190	{ .graphics_ver = `12`, .base = XEHP_BSD8_RING_BASE }
191	},
192	},
193	[VECS0] = {
194	.class = VIDEO_ENHANCEMENT_CLASS,
195	.instance = `0`,
196	.mmio_bases = {
197	{ .graphics_ver = `11`, .base = GEN11_VEBOX_RING_BASE },
198	{ .graphics_ver = `7`, .base = VEBOX_RING_BASE }
199	},
200	},
201	[VECS1] = {
202	.class = VIDEO_ENHANCEMENT_CLASS,
203	.instance = `1`,
204	.mmio_bases = {
205	{ .graphics_ver = `11`, .base = GEN11_VEBOX2_RING_BASE }
206	},
207	},
208	[VECS2] = {
209	.class = VIDEO_ENHANCEMENT_CLASS,
210	.instance = `2`,
211	.mmio_bases = {
212	{ .graphics_ver = `12`, .base = XEHP_VEBOX3_RING_BASE }
213	},
214	},
215	[VECS3] = {
216	.class = VIDEO_ENHANCEMENT_CLASS,
217	.instance = `3`,
218	.mmio_bases = {
219	{ .graphics_ver = `12`, .base = XEHP_VEBOX4_RING_BASE }
220	},
221	},
222	[CCS0] = {
223	.class = COMPUTE_CLASS,
224	.instance = `0`,
225	.mmio_bases = {
226	{ .graphics_ver = `12`, .base = GEN12_COMPUTE0_RING_BASE }
227	}
228	},
229	[CCS1] = {
230	.class = COMPUTE_CLASS,
231	.instance = `1`,
232	.mmio_bases = {
233	{ .graphics_ver = `12`, .base = GEN12_COMPUTE1_RING_BASE }
234	}
235	},
236	[CCS2] = {
237	.class = COMPUTE_CLASS,
238	.instance = `2`,
239	.mmio_bases = {
240	{ .graphics_ver = `12`, .base = GEN12_COMPUTE2_RING_BASE }
241	}
242	},
243	[CCS3] = {
244	.class = COMPUTE_CLASS,
245	.instance = `3`,
246	.mmio_bases = {
247	{ .graphics_ver = `12`, .base = GEN12_COMPUTE3_RING_BASE }
248	}
249	},
250	[GSC0] = {
251	.class = OTHER_CLASS,
252	.instance = OTHER_GSC_INSTANCE,
253	.mmio_bases = {
254	{ .graphics_ver = `12`, .base = MTL_GSC_RING_BASE }
255	}
256	},
257	};
258
259	/**
260	* intel_engine_context_size() - return the size of the context for an engine
261	* @gt: the gt
262	* @class: engine class
263	*
264	* Each engine class may require a different amount of space for a context
265	* image.
266	*
267	* Return: size (in bytes) of an engine class specific context image
268	*
269	* Note: this size includes the HWSP, which is part of the context image
270	* in LRC mode, but does not include the "shared data page" used with
271	* GuC submission. The caller should account for this if using the GuC.
272	*/
273	u32 intel_engine_context_size(struct intel_gt *gt, u8 class)
274	{
275	struct intel_uncore *uncore = gt->uncore;
276	u32 cxt_size;
277
278	BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE);
279
280	switch (class) {
281	case COMPUTE_CLASS:
282	fallthrough;
283	case RENDER_CLASS:
284	switch (GRAPHICS_VER(gt->i915)) {
285	default:
286	MISSING_CASE(GRAPHICS_VER(gt->i915));
287	return DEFAULT_LR_CONTEXT_RENDER_SIZE;
288	case `12`:
289	case `11`:
290	return GEN11_LR_CONTEXT_RENDER_SIZE;
291	case `9`:
292	return GEN9_LR_CONTEXT_RENDER_SIZE;
293	case `8`:
294	return GEN8_LR_CONTEXT_RENDER_SIZE;
295	case `7`:
296	if (IS_HASWELL(gt->i915))
297	return HSW_CXT_TOTAL_SIZE;
298
299	cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE);
300	return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * `64`,
301	PAGE_SIZE);
302	case `6`:
303	cxt_size = intel_uncore_read(uncore, CXT_SIZE);
304	return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * `64`,
305	PAGE_SIZE);
306	case `5`:
307	case `4`:
308	/*
309	* There is a discrepancy here between the size reported
310	* by the register and the size of the context layout
311	* in the docs. Both are described as authoritative!
312	*
313	* The discrepancy is on the order of a few cachelines,
314	* but the total is under one page (4k), which is our
315	* minimum allocation anyway so it should all come
316	* out in the wash.
317	*/
318	cxt_size = intel_uncore_read(uncore, CXT_SIZE) + `1`;
319	gt_dbg(gt, "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n",
320	GRAPHICS_VER(gt->i915), cxt_size * `64`,
321	cxt_size - `1`);
322	return round_up(cxt_size * `64`, PAGE_SIZE);
323	case `3`:
324	case `2`:
325	/ For the special day when i810 gets merged. /
326	case `1`:
327	return `0`;
328	}
329	break;
330	default:
331	MISSING_CASE(class);
332	fallthrough;
333	case VIDEO_DECODE_CLASS:
334	case VIDEO_ENHANCEMENT_CLASS:
335	case COPY_ENGINE_CLASS:
336	case OTHER_CLASS:
337	if (GRAPHICS_VER(gt->i915) < `8`)
338	return `0`;
339	return GEN8_LR_CONTEXT_OTHER_SIZE;
340	}
341	}
342
343	static u32 __engine_mmio_base(struct drm_i915_private *i915,
344	const struct engine_mmio_base *bases)
345	{
346	int i;
347
348	for (i = `0`; i < MAX_MMIO_BASES; i++)
349	if (GRAPHICS_VER(i915) >= bases[i].graphics_ver)
350	break;
351
352	GEM_BUG_ON(i == MAX_MMIO_BASES);
353	GEM_BUG_ON(!bases[i].base);
354
355	return bases[i].base;
356	}
357
358	static void __sprint_engine_name(struct intel_engine_cs *engine)
359	{
360	/*
361	* Before we know what the uABI name for this engine will be,
362	* we still would like to keep track of this engine in the debug logs.
363	* We throw in a ' here as a reminder that this isn't its final name.
364	*/
365	GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u",
366	intel_engine_class_repr(engine->class),
367	engine->instance) >= sizeof(engine->name));
368	}
369
370	void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask)
371	{
372	/*
373	* Though they added more rings on g4x/ilk, they did not add
374	* per-engine HWSTAM until gen6.
375	*/
376	if (GRAPHICS_VER(engine->i915) < `6` && engine->class != RENDER_CLASS)
377	return;
378
379	if (GRAPHICS_VER(engine->i915) >= `3`)
380	ENGINE_WRITE(engine, RING_HWSTAM, mask);
381	else
382	ENGINE_WRITE16(engine, RING_HWSTAM, mask);
383	}
384
385	static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine)
386	{
387	/ Mask off all writes into the unknown HWSP /
388	intel_engine_set_hwsp_writemask(engine, mask: ~`0u`);
389	}
390
391	static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir)
392	{
393	GEM_DEBUG_WARN_ON(iir);
394	}
395
396	static u32 get_reset_domain(u8 ver, enum intel_engine_id id)
397	{
398	u32 reset_domain;
399
400	if (ver >= `11`) {
401	static const u32 engine_reset_domains[] = {
402	[RCS0] = GEN11_GRDOM_RENDER,
403	[BCS0] = GEN11_GRDOM_BLT,
404	[BCS1] = XEHPC_GRDOM_BLT1,
405	[BCS2] = XEHPC_GRDOM_BLT2,
406	[BCS3] = XEHPC_GRDOM_BLT3,
407	[BCS4] = XEHPC_GRDOM_BLT4,
408	[BCS5] = XEHPC_GRDOM_BLT5,
409	[BCS6] = XEHPC_GRDOM_BLT6,
410	[BCS7] = XEHPC_GRDOM_BLT7,
411	[BCS8] = XEHPC_GRDOM_BLT8,
412	[VCS0] = GEN11_GRDOM_MEDIA,
413	[VCS1] = GEN11_GRDOM_MEDIA2,
414	[VCS2] = GEN11_GRDOM_MEDIA3,
415	[VCS3] = GEN11_GRDOM_MEDIA4,
416	[VCS4] = GEN11_GRDOM_MEDIA5,
417	[VCS5] = GEN11_GRDOM_MEDIA6,
418	[VCS6] = GEN11_GRDOM_MEDIA7,
419	[VCS7] = GEN11_GRDOM_MEDIA8,
420	[VECS0] = GEN11_GRDOM_VECS,
421	[VECS1] = GEN11_GRDOM_VECS2,
422	[VECS2] = GEN11_GRDOM_VECS3,
423	[VECS3] = GEN11_GRDOM_VECS4,
424	[CCS0] = GEN11_GRDOM_RENDER,
425	[CCS1] = GEN11_GRDOM_RENDER,
426	[CCS2] = GEN11_GRDOM_RENDER,
427	[CCS3] = GEN11_GRDOM_RENDER,
428	[GSC0] = GEN12_GRDOM_GSC,
429	};
430	GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) \|\|
431	!engine_reset_domains[id]);
432	reset_domain = engine_reset_domains[id];
433	} else {
434	static const u32 engine_reset_domains[] = {
435	[RCS0] = GEN6_GRDOM_RENDER,
436	[BCS0] = GEN6_GRDOM_BLT,
437	[VCS0] = GEN6_GRDOM_MEDIA,
438	[VCS1] = GEN8_GRDOM_MEDIA2,
439	[VECS0] = GEN6_GRDOM_VECS,
440	};
441	GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) \|\|
442	!engine_reset_domains[id]);
443	reset_domain = engine_reset_domains[id];
444	}
445
446	return reset_domain;
447	}
448
449	static int intel_engine_setup(struct intel_gt gt, enum* intel_engine_id id,
450	u8 logical_instance)
451	{
452	const struct engine_info *info = &intel_engines[id];
453	struct drm_i915_private *i915 = gt->i915;
454	struct intel_engine_cs *engine;
455	u8 guc_class;
456
457	BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH));
458	BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH));
459	BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + `1`));
460	BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + `1`));
461
462	if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine)))
463	return -EINVAL;
464
465	if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS))
466	return -EINVAL;
467
468	if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE))
469	return -EINVAL;
470
471	if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance]))
472	return -EINVAL;
473
474	engine = kzalloc(sizeof(*engine), GFP_KERNEL);
475	if (!engine)
476	return -ENOMEM;
477
478	BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES);
479
480	INIT_LIST_HEAD(list: &engine->pinned_contexts_list);
481	engine->id = id;
482	engine->legacy_idx = INVALID_ENGINE;
483	engine->mask = BIT(id);
484	engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915),
485	id);
486	engine->i915 = i915;
487	engine->gt = gt;
488	engine->uncore = gt->uncore;
489	guc_class = engine_class_to_guc_class(class: info->class);
490	engine->guc_id = MAKE_GUC_ID(guc_class, info->instance);
491	engine->mmio_base = __engine_mmio_base(i915, bases: info->mmio_bases);
492
493	engine->irq_handler = nop_irq_handler;
494
495	engine->class = info->class;
496	engine->instance = info->instance;
497	engine->logical_mask = BIT(logical_instance);
498	__sprint_engine_name(engine);
499
500	if ((engine->class == COMPUTE_CLASS \|\| engine->class == RENDER_CLASS) &&
501	__ffs(CCS_MASK(engine->gt) \| RCS_MASK(engine->gt)) == engine->instance)
502	engine->flags \|= I915_ENGINE_FIRST_RENDER_COMPUTE;
503
504	/ features common between engines sharing EUs /
505	if (engine->class == RENDER_CLASS \|\| engine->class == COMPUTE_CLASS) {
506	engine->flags \|= I915_ENGINE_HAS_RCS_REG_STATE;
507	engine->flags \|= I915_ENGINE_HAS_EU_PRIORITY;
508	}
509
510	engine->props.heartbeat_interval_ms =
511	CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
512	engine->props.max_busywait_duration_ns =
513	CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT;
514	engine->props.preempt_timeout_ms =
515	CONFIG_DRM_I915_PREEMPT_TIMEOUT;
516	engine->props.stop_timeout_ms =
517	CONFIG_DRM_I915_STOP_TIMEOUT;
518	engine->props.timeslice_duration_ms =
519	CONFIG_DRM_I915_TIMESLICE_DURATION;
520
521	/*
522	* Mid-thread pre-emption is not available in Gen12. Unfortunately,
523	* some compute workloads run quite long threads. That means they get
524	* reset due to not pre-empting in a timely manner. So, bump the
525	* pre-emption timeout value to be much higher for compute engines.
526	*/
527	if (GRAPHICS_VER(i915) == `12` && (engine->flags & I915_ENGINE_HAS_RCS_REG_STATE))
528	engine->props.preempt_timeout_ms = CONFIG_DRM_I915_PREEMPT_TIMEOUT_COMPUTE;
529
530	/ Cap properties according to any system limits /
531	#define CLAMP_PROP(field) \
532	do { \
533	u64 clamp = intel_clamp_##field(engine, engine->props.field); \
534	if (clamp != engine->props.field) { \
535	drm_notice(&engine->i915->drm, \
536	"Warning, clamping %s to %lld to prevent overflow\n", \
537	#field, clamp); \
538	engine->props.field = clamp; \
539	} \
540	} while (0)
541
542	CLAMP_PROP(heartbeat_interval_ms);
543	CLAMP_PROP(max_busywait_duration_ns);
544	CLAMP_PROP(preempt_timeout_ms);
545	CLAMP_PROP(stop_timeout_ms);
546	CLAMP_PROP(timeslice_duration_ms);
547
548	#undef CLAMP_PROP
549
550	engine->defaults = engine->props; / never to change again /
551
552	engine->context_size = intel_engine_context_size(gt, class: engine->class);
553	if (WARN_ON(engine->context_size > BIT(`20`)))
554	engine->context_size = `0`;
555	if (engine->context_size)
556	DRIVER_CAPS(i915)->has_logical_contexts = true;
557
558	ewma__engine_latency_init(e: &engine->latency);
559
560	ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier);
561
562	/ Scrub mmio state on takeover /
563	intel_engine_sanitize_mmio(engine);
564
565	gt->engine_class[info->class][info->instance] = engine;
566	gt->engine[id] = engine;
567
568	return `0`;
569	}
570
571	u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value)
572	{
573	value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
574
575	return value;
576	}
577
578	u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value)
579	{
580	value = min(value, jiffies_to_nsecs(`2`));
581
582	return value;
583	}
584
585	u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value)
586	{
587	/*
588	* NB: The GuC API only supports 32bit values. However, the limit is further
589	* reduced due to internal calculations which would otherwise overflow.
590	*/
591	if (intel_guc_submission_is_wanted(guc: gt_to_guc(gt: engine->gt)))
592	value = min_t(u64, value, guc_policy_max_preempt_timeout_ms());
593
594	value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
595
596	return value;
597	}
598
599	u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value)
600	{
601	value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
602
603	return value;
604	}
605
606	u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value)
607	{
608	/*
609	* NB: The GuC API only supports 32bit values. However, the limit is further
610	* reduced due to internal calculations which would otherwise overflow.
611	*/
612	if (intel_guc_submission_is_wanted(guc: gt_to_guc(gt: engine->gt)))
613	value = min_t(u64, value, guc_policy_max_exec_quantum_ms());
614
615	value = min_t(u64, value, jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT));
616
617	return value;
618	}
619
620	static void __setup_engine_capabilities(struct intel_engine_cs *engine)
621	{
622	struct drm_i915_private *i915 = engine->i915;
623
624	if (engine->class == VIDEO_DECODE_CLASS) {
625	/*
626	* HEVC support is present on first engine instance
627	* before Gen11 and on all instances afterwards.
628	*/
629	if (GRAPHICS_VER(i915) >= `11` \|\|
630	(GRAPHICS_VER(i915) >= `9` && engine->instance == `0`))
631	engine->uabi_capabilities \|=
632	I915_VIDEO_CLASS_CAPABILITY_HEVC;
633
634	/*
635	* SFC block is present only on even logical engine
636	* instances.
637	*/
638	if ((GRAPHICS_VER(i915) >= `11` &&
639	(engine->gt->info.vdbox_sfc_access &
640	BIT(engine->instance))) \|\|
641	(GRAPHICS_VER(i915) >= `9` && engine->instance == `0`))
642	engine->uabi_capabilities \|=
643	I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
644	} else if (engine->class == VIDEO_ENHANCEMENT_CLASS) {
645	if (GRAPHICS_VER(i915) >= `9` &&
646	engine->gt->info.sfc_mask & BIT(engine->instance))
647	engine->uabi_capabilities \|=
648	I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC;
649	}
650	}
651
652	static void intel_setup_engine_capabilities(struct intel_gt *gt)
653	{
654	struct intel_engine_cs *engine;
655	enum intel_engine_id id;
656
657	for_each_engine(engine, gt, id)
658	__setup_engine_capabilities(engine);
659	}
660
661	/**
662	* intel_engines_release() - free the resources allocated for Command Streamers
663	* @gt: pointer to struct intel_gt
664	*/
665	void intel_engines_release(struct intel_gt *gt)
666	{
667	struct intel_engine_cs *engine;
668	enum intel_engine_id id;
669
670	/*
671	* Before we release the resources held by engine, we must be certain
672	* that the HW is no longer accessing them -- having the GPU scribble
673	* to or read from a page being used for something else causes no end
674	* of fun.
675	*
676	* The GPU should be reset by this point, but assume the worst just
677	* in case we aborted before completely initialising the engines.
678	*/
679	GEM_BUG_ON(intel_gt_pm_is_awake(gt));
680	if (!intel_gt_gpu_reset_clobbers_display(gt))
681	intel_gt_reset_all_engines(gt);
682
683	/ Decouple the backend; but keep the layout for late GPU resets /
684	for_each_engine(engine, gt, id) {
685	if (!engine->release)
686	continue;
687
688	intel_wakeref_wait_for_idle(wf: &engine->wakeref);
689	GEM_BUG_ON(intel_engine_pm_is_awake(engine));
690
691	engine->release(engine);
692	engine->release = NULL;
693
694	memset(&engine->reset, `0`, sizeof(engine->reset));
695	}
696
697	llist_del_all(head: &gt->i915->uabi_engines_llist);
698	}
699
700	void intel_engine_free_request_pool(struct intel_engine_cs *engine)
701	{
702	if (!engine->request_pool)
703	return;
704
705	kmem_cache_free(s: i915_request_slab_cache(), objp: engine->request_pool);
706	}
707
708	void intel_engines_free(struct intel_gt *gt)
709	{
710	struct intel_engine_cs *engine;
711	enum intel_engine_id id;
712
713	/ Free the requests! dma-resv keeps fences around for an eternity /
714	rcu_barrier();
715
716	for_each_engine(engine, gt, id) {
717	intel_engine_free_request_pool(engine);
718	kfree(objp: engine);
719	gt->engine[id] = NULL;
720	}
721	}
722
723	static
724	bool gen11_vdbox_has_sfc(struct intel_gt *gt,
725	unsigned int physical_vdbox,
726	unsigned int logical_vdbox, u16 vdbox_mask)
727	{
728	struct drm_i915_private *i915 = gt->i915;
729
730	/*
731	* In Gen11, only even numbered logical VDBOXes are hooked
732	* up to an SFC (Scaler & Format Converter) unit.
733	* In Gen12, Even numbered physical instance always are connected
734	* to an SFC. Odd numbered physical instances have SFC only if
735	* previous even instance is fused off.
736	*
737	* Starting with Xe_HP, there's also a dedicated SFC_ENABLE field
738	* in the fuse register that tells us whether a specific SFC is present.
739	*/
740	if ((gt->info.sfc_mask & BIT(physical_vdbox / `2`)) == `0`)
741	return false;
742	else if (MEDIA_VER(i915) >= `12`)
743	return (physical_vdbox % `2` == `0`) \|\|
744	!(BIT(physical_vdbox - `1`) & vdbox_mask);
745	else if (MEDIA_VER(i915) == `11`)
746	return logical_vdbox % `2` == `0`;
747
748	return false;
749	}
750
751	static void engine_mask_apply_media_fuses(struct intel_gt *gt)
752	{
753	struct drm_i915_private *i915 = gt->i915;
754	unsigned int logical_vdbox = `0`;
755	unsigned int i;
756	u32 media_fuse, fuse1;
757	u16 vdbox_mask;
758	u16 vebox_mask;
759
760	if (MEDIA_VER(gt->i915) < `11`)
761	return;
762
763	/*
764	* On newer platforms the fusing register is called 'enable' and has
765	* enable semantics, while on older platforms it is called 'disable'
766	* and bits have disable semantices.
767	*/
768	media_fuse = intel_uncore_read(uncore: gt->uncore, GEN11_GT_VEBOX_VDBOX_DISABLE);
769	if (MEDIA_VER_FULL(i915) < IP_VER(`12`, `55`))
770	media_fuse = ~media_fuse;
771
772	vdbox_mask = REG_FIELD_GET(GEN11_GT_VDBOX_DISABLE_MASK, media_fuse);
773	vebox_mask = REG_FIELD_GET(GEN11_GT_VEBOX_DISABLE_MASK, media_fuse);
774
775	if (MEDIA_VER_FULL(i915) >= IP_VER(`12`, `55`)) {
776	fuse1 = intel_uncore_read(uncore: gt->uncore, HSW_PAVP_FUSE1);
777	gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1);
778	} else {
779	gt->info.sfc_mask = ~`0`;
780	}
781
782	for (i = `0`; i < I915_MAX_VCS; i++) {
783	if (!HAS_ENGINE(gt, _VCS(i))) {
784	vdbox_mask &= ~BIT(i);
785	continue;
786	}
787
788	if (!(BIT(i) & vdbox_mask)) {
789	gt->info.engine_mask &= ~BIT(_VCS(i));
790	gt_dbg(gt, "vcs%u fused off\n", i);
791	continue;
792	}
793
794	if (gen11_vdbox_has_sfc(gt, physical_vdbox: i, logical_vdbox, vdbox_mask))
795	gt->info.vdbox_sfc_access \|= BIT(i);
796	logical_vdbox++;
797	}
798	gt_dbg(gt, "vdbox enable: %04x, instances: %04lx\n", vdbox_mask, VDBOX_MASK(gt));
799	GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt));
800
801	for (i = `0`; i < I915_MAX_VECS; i++) {
802	if (!HAS_ENGINE(gt, _VECS(i))) {
803	vebox_mask &= ~BIT(i);
804	continue;
805	}
806
807	if (!(BIT(i) & vebox_mask)) {
808	gt->info.engine_mask &= ~BIT(_VECS(i));
809	gt_dbg(gt, "vecs%u fused off\n", i);
810	}
811	}
812	gt_dbg(gt, "vebox enable: %04x, instances: %04lx\n", vebox_mask, VEBOX_MASK(gt));
813	GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt));
814	}
815
816	static void engine_mask_apply_compute_fuses(struct intel_gt *gt)
817	{
818	struct drm_i915_private *i915 = gt->i915;
819	struct intel_gt_info *info = &gt->info;
820	int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS;
821	unsigned long ccs_mask;
822	unsigned int i;
823
824	if (GRAPHICS_VER(i915) < `11`)
825	return;
826
827	if (hweight32(CCS_MASK(gt)) <= `1`)
828	return;
829
830	ccs_mask = intel_slicemask_from_xehp_dssmask(dss_mask: info->sseu.compute_subslice_mask,
831	dss_per_slice: ss_per_ccs);
832	/*
833	* If all DSS in a quadrant are fused off, the corresponding CCS
834	* engine is not available for use.
835	*/
836	for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) {
837	info->engine_mask &= ~BIT(_CCS(i));
838	gt_dbg(gt, "ccs%u fused off\n", i);
839	}
840	}
841
842	/*
843	* Determine which engines are fused off in our particular hardware.
844	* Note that we have a catch-22 situation where we need to be able to access
845	* the blitter forcewake domain to read the engine fuses, but at the same time
846	* we need to know which engines are available on the system to know which
847	* forcewake domains are present. We solve this by initializing the forcewake
848	* domains based on the full engine mask in the platform capabilities before
849	* calling this function and pruning the domains for fused-off engines
850	* afterwards.
851	*/
852	static intel_engine_mask_t init_engine_mask(struct intel_gt *gt)
853	{
854	struct intel_gt_info *info = &gt->info;
855
856	GEM_BUG_ON(!info->engine_mask);
857
858	engine_mask_apply_media_fuses(gt);
859	engine_mask_apply_compute_fuses(gt);
860
861	/*
862	* The only use of the GSC CS is to load and communicate with the GSC
863	* FW, so we have no use for it if we don't have the FW.
864	*
865	* IMPORTANT: in cases where we don't have the GSC FW, we have a
866	* catch-22 situation that breaks media C6 due to 2 requirements:
867	* 1) once turned on, the GSC power well will not go to sleep unless the
868	* GSC FW is loaded.
869	* 2) to enable idling (which is required for media C6) we need to
870	* initialize the IDLE_MSG register for the GSC CS and do at least 1
871	* submission, which will wake up the GSC power well.
872	*/
873	if (__HAS_ENGINE(info->engine_mask, GSC0) && !intel_uc_wants_gsc_uc(uc: &gt->uc)) {
874	gt_notice(gt, "No GSC FW selected, disabling GSC CS and media C6\n");
875	info->engine_mask &= ~BIT(GSC0);
876	}
877
878	/*
879	* Do not create the command streamer for CCS slices beyond the first.
880	* All the workload submitted to the first engine will be shared among
881	* all the slices.
882	*
883	* Once the user will be allowed to customize the CCS mode, then this
884	* check needs to be removed.
885	*/
886	if (IS_DG2(gt->i915)) {
887	u8 first_ccs = __ffs(CCS_MASK(gt));
888
889	/*
890	* Store the number of active cslices before
891	* changing the CCS engine configuration
892	*/
893	gt->ccs.cslices = CCS_MASK(gt);
894
895	/ Mask off all the CCS engine /
896	info->engine_mask &= ~GENMASK(CCS3, CCS0);
897	/ Put back in the first CCS engine /
898	info->engine_mask \|= BIT(_CCS(first_ccs));
899	}
900
901	return info->engine_mask;
902	}
903
904	static void populate_logical_ids(struct intel_gt gt, u8 logical_ids,
905	u8 class, const u8 *map, u8 num_instances)
906	{
907	int i, j;
908	u8 current_logical_id = `0`;
909
910	for (j = `0`; j < num_instances; ++j) {
911	for (i = `0`; i < ARRAY_SIZE(intel_engines); ++i) {
912	if (!HAS_ENGINE(gt, i) \|\|
913	intel_engines[i].class != class)
914	continue;
915
916	if (intel_engines[i].instance == map[j]) {
917	logical_ids[intel_engines[i].instance] =
918	current_logical_id++;
919	break;
920	}
921	}
922	}
923	}
924
925	static void setup_logical_ids(struct intel_gt gt, u8 logical_ids, u8 class)
926	{
927	/*
928	* Logical to physical mapping is needed for proper support
929	* to split-frame feature.
930	*/
931	if (MEDIA_VER(gt->i915) >= `11` && class == VIDEO_DECODE_CLASS) {
932	const u8 map[] = { `0`, `2`, `4`, `6`, `1`, `3`, `5`, `7` };
933
934	populate_logical_ids(gt, logical_ids, class,
935	map, ARRAY_SIZE(map));
936	} else {
937	int i;
938	u8 map[MAX_ENGINE_INSTANCE + `1`];
939
940	for (i = `0`; i < MAX_ENGINE_INSTANCE + `1`; ++i)
941	map[i] = i;
942	populate_logical_ids(gt, logical_ids, class,
943	map, ARRAY_SIZE(map));
944	}
945	}
946
947	/**
948	* intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers
949	* @gt: pointer to struct intel_gt
950	*
951	* Return: non-zero if the initialization failed.
952	*/
953	int intel_engines_init_mmio(struct intel_gt *gt)
954	{
955	struct drm_i915_private *i915 = gt->i915;
956	const unsigned int engine_mask = init_engine_mask(gt);
957	unsigned int mask = `0`;
958	unsigned int i, class;
959	u8 logical_ids[MAX_ENGINE_INSTANCE + `1`];
960	int err;
961
962	drm_WARN_ON(&i915->drm, engine_mask == `0`);
963	drm_WARN_ON(&i915->drm, engine_mask &
964	GENMASK(BITS_PER_TYPE(mask) - `1`, I915_NUM_ENGINES));
965
966	if (i915_inject_probe_failure(i915))
967	return -ENODEV;
968
969	for (class = `0`; class < MAX_ENGINE_CLASS + `1`; ++class) {
970	setup_logical_ids(gt, logical_ids, class);
971
972	for (i = `0`; i < ARRAY_SIZE(intel_engines); ++i) {
973	u8 instance = intel_engines[i].instance;
974
975	if (intel_engines[i].class != class \|\|
976	!HAS_ENGINE(gt, i))
977	continue;
978
979	err = intel_engine_setup(gt, id: i,
980	logical_instance: logical_ids[instance]);
981	if (err)
982	goto cleanup;
983
984	mask \|= BIT(i);
985	}
986	}
987
988	/*
989	* Catch failures to update intel_engines table when the new engines
990	* are added to the driver by a warning and disabling the forgotten
991	* engines.
992	*/
993	if (drm_WARN_ON(&i915->drm, mask != engine_mask))
994	gt->info.engine_mask = mask;
995
996	gt->info.num_engines = hweight32(mask);
997
998	intel_gt_check_and_clear_faults(gt);
999
1000	intel_setup_engine_capabilities(gt);
1001
1002	intel_uncore_prune_engine_fw_domains(uncore: gt->uncore, gt);
1003
1004	return `0`;
1005
1006	cleanup:
1007	intel_engines_free(gt);
1008	return err;
1009	}
1010
1011	void intel_engine_init_execlists(struct intel_engine_cs *engine)
1012	{
1013	struct intel_engine_execlists * const execlists = &engine->execlists;
1014
1015	execlists->port_mask = `1`;
1016	GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists)));
1017	GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS);
1018
1019	memset(execlists->pending, `0`, sizeof(execlists->pending));
1020	execlists->active =
1021	memset(execlists->inflight, `0`, sizeof(execlists->inflight));
1022	}
1023
1024	static void cleanup_status_page(struct intel_engine_cs *engine)
1025	{
1026	struct i915_vma *vma;
1027
1028	/ Prevent writes into HWSP after returning the page to the system /
1029	intel_engine_set_hwsp_writemask(engine, mask: ~`0u`);
1030
1031	vma = fetch_and_zero(&engine->status_page.vma);
1032	if (!vma)
1033	return;
1034
1035	if (!HWS_NEEDS_PHYSICAL(engine->i915))
1036	i915_vma_unpin(vma);
1037
1038	i915_gem_object_unpin_map(obj: vma->obj);
1039	i915_gem_object_put(obj: vma->obj);
1040	}
1041
1042	static int pin_ggtt_status_page(struct intel_engine_cs *engine,
1043	struct i915_gem_ww_ctx *ww,
1044	struct i915_vma *vma)
1045	{
1046	unsigned int flags;
1047
1048	if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(ggtt: engine->gt->ggtt))
1049	/*
1050	* On g33, we cannot place HWS above 256MiB, so
1051	* restrict its pinning to the low mappable arena.
1052	* Though this restriction is not documented for
1053	* gen4, gen5, or byt, they also behave similarly
1054	* and hang if the HWS is placed at the top of the
1055	* GTT. To generalise, it appears that all !llc
1056	* platforms have issues with us placing the HWS
1057	* above the mappable region (even though we never
1058	* actually map it).
1059	*/
1060	flags = PIN_MAPPABLE;
1061	else
1062	flags = PIN_HIGH;
1063
1064	return i915_ggtt_pin(vma, ww, align: `0`, flags);
1065	}
1066
1067	static int init_status_page(struct intel_engine_cs *engine)
1068	{
1069	struct drm_i915_gem_object *obj;
1070	struct i915_gem_ww_ctx ww;
1071	struct i915_vma *vma;
1072	void *vaddr;
1073	int ret;
1074
1075	INIT_LIST_HEAD(list: &engine->status_page.timelines);
1076
1077	/*
1078	* Though the HWS register does support 36bit addresses, historically
1079	* we have had hangs and corruption reported due to wild writes if
1080	* the HWS is placed above 4G. We only allow objects to be allocated
1081	* in GFP_DMA32 for i965, and no earlier physical address users had
1082	* access to more than 4G.
1083	*/
1084	obj = i915_gem_object_create_internal(i915: engine->i915, PAGE_SIZE);
1085	if (IS_ERR(ptr: obj)) {
1086	gt_err(engine->gt, "Failed to allocate status page\n");
1087	return PTR_ERR(ptr: obj);
1088	}
1089
1090	i915_gem_object_set_cache_coherency(obj, cache_level: I915_CACHE_LLC);
1091
1092	vma = i915_vma_instance(obj, vm: &engine->gt->ggtt->vm, NULL);
1093	if (IS_ERR(ptr: vma)) {
1094	ret = PTR_ERR(ptr: vma);
1095	goto err_put;
1096	}
1097
1098	i915_gem_ww_ctx_init(ctx: &ww, intr: true);
1099	retry:
1100	ret = i915_gem_object_lock(obj, ww: &ww);
1101	if (!ret && !HWS_NEEDS_PHYSICAL(engine->i915))
1102	ret = pin_ggtt_status_page(engine, ww: &ww, vma);
1103	if (ret)
1104	goto err;
1105
1106	vaddr = i915_gem_object_pin_map(obj, type: I915_MAP_WB);
1107	if (IS_ERR(ptr: vaddr)) {
1108	ret = PTR_ERR(ptr: vaddr);
1109	goto err_unpin;
1110	}
1111
1112	engine->status_page.addr = memset(vaddr, `0`, PAGE_SIZE);
1113	engine->status_page.vma = vma;
1114
1115	err_unpin:
1116	if (ret)
1117	i915_vma_unpin(vma);
1118	err:
1119	if (ret == -EDEADLK) {
1120	ret = i915_gem_ww_ctx_backoff(ctx: &ww);
1121	if (!ret)
1122	goto retry;
1123	}
1124	i915_gem_ww_ctx_fini(ctx: &ww);
1125	err_put:
1126	if (ret)
1127	i915_gem_object_put(obj);
1128	return ret;
1129	}
1130
1131	static int intel_engine_init_tlb_invalidation(struct intel_engine_cs *engine)
1132	{
1133	static const union intel_engine_tlb_inv_reg gen8_regs[] = {
1134	[RENDER_CLASS].reg = GEN8_RTCR,
1135	[VIDEO_DECODE_CLASS].reg = GEN8_M1TCR, / , GEN8_M2TCR /
1136	[VIDEO_ENHANCEMENT_CLASS].reg = GEN8_VTCR,
1137	[COPY_ENGINE_CLASS].reg = GEN8_BTCR,
1138	};
1139	static const union intel_engine_tlb_inv_reg gen12_regs[] = {
1140	[RENDER_CLASS].reg = GEN12_GFX_TLB_INV_CR,
1141	[VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR,
1142	[VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
1143	[COPY_ENGINE_CLASS].reg = GEN12_BLT_TLB_INV_CR,
1144	[COMPUTE_CLASS].reg = GEN12_COMPCTX_TLB_INV_CR,
1145	};
1146	static const union intel_engine_tlb_inv_reg xehp_regs[] = {
1147	[RENDER_CLASS].mcr_reg = XEHP_GFX_TLB_INV_CR,
1148	[VIDEO_DECODE_CLASS].mcr_reg = XEHP_VD_TLB_INV_CR,
1149	[VIDEO_ENHANCEMENT_CLASS].mcr_reg = XEHP_VE_TLB_INV_CR,
1150	[COPY_ENGINE_CLASS].mcr_reg = XEHP_BLT_TLB_INV_CR,
1151	[COMPUTE_CLASS].mcr_reg = XEHP_COMPCTX_TLB_INV_CR,
1152	};
1153	static const union intel_engine_tlb_inv_reg xelpmp_regs[] = {
1154	[VIDEO_DECODE_CLASS].reg = GEN12_VD_TLB_INV_CR,
1155	[VIDEO_ENHANCEMENT_CLASS].reg = GEN12_VE_TLB_INV_CR,
1156	[OTHER_CLASS].reg = XELPMP_GSC_TLB_INV_CR,
1157	};
1158	struct drm_i915_private *i915 = engine->i915;
1159	const unsigned int instance = engine->instance;
1160	const unsigned int class = engine->class;
1161	const union intel_engine_tlb_inv_reg *regs;
1162	union intel_engine_tlb_inv_reg reg;
1163	unsigned int num = `0`;
1164	u32 val;
1165
1166	/*
1167	* New platforms should not be added with catch-all-newer (>=)
1168	* condition so that any later platform added triggers the below warning
1169	* and in turn mandates a human cross-check of whether the invalidation
1170	* flows have compatible semantics.
1171	*
1172	* For instance with the 11.00 -> 12.00 transition three out of five
1173	* respective engine registers were moved to masked type. Then after the
1174	* 12.00 -> 12.50 transition multi cast handling is required too.
1175	*/
1176
1177	if (engine->gt->type == GT_MEDIA) {
1178	if (MEDIA_VER_FULL(i915) == IP_VER(`13`, `0`)) {
1179	regs = xelpmp_regs;
1180	num = ARRAY_SIZE(xelpmp_regs);
1181	}
1182	} else {
1183	if (GRAPHICS_VER_FULL(i915) == IP_VER(`12`, `74`) \|\|
1184	GRAPHICS_VER_FULL(i915) == IP_VER(`12`, `71`) \|\|
1185	GRAPHICS_VER_FULL(i915) == IP_VER(`12`, `70`) \|\|
1186	GRAPHICS_VER_FULL(i915) == IP_VER(`12`, `55`)) {
1187	regs = xehp_regs;
1188	num = ARRAY_SIZE(xehp_regs);
1189	} else if (GRAPHICS_VER_FULL(i915) == IP_VER(`12`, `0`) \|\|
1190	GRAPHICS_VER_FULL(i915) == IP_VER(`12`, `10`)) {
1191	regs = gen12_regs;
1192	num = ARRAY_SIZE(gen12_regs);
1193	} else if (GRAPHICS_VER(i915) >= `8` && GRAPHICS_VER(i915) <= `11`) {
1194	regs = gen8_regs;
1195	num = ARRAY_SIZE(gen8_regs);
1196	} else if (GRAPHICS_VER(i915) < `8`) {
1197	return `0`;
1198	}
1199	}
1200
1201	if (gt_WARN_ONCE(engine->gt, !num,
1202	"Platform does not implement TLB invalidation!"))
1203	return -ENODEV;
1204
1205	if (gt_WARN_ON_ONCE(engine->gt,
1206	class >= num \|\|
1207	(!regs[class].reg.reg &&
1208	!regs[class].mcr_reg.reg)))
1209	return -ERANGE;
1210
1211	reg = regs[class];
1212
1213	if (regs == xelpmp_regs && class == OTHER_CLASS) {
1214	/*
1215	* There's only a single GSC instance, but it uses register bit
1216	* 1 instead of either 0 or OTHER_GSC_INSTANCE.
1217	*/
1218	GEM_WARN_ON(instance != OTHER_GSC_INSTANCE);
1219	val = `1`;
1220	} else if (regs == gen8_regs && class == VIDEO_DECODE_CLASS && instance == `1`) {
1221	reg.reg = GEN8_M2TCR;
1222	val = `0`;
1223	} else {
1224	val = instance;
1225	}
1226
1227	val = BIT(val);
1228
1229	engine->tlb_inv.mcr = regs == xehp_regs;
1230	engine->tlb_inv.reg = reg;
1231	engine->tlb_inv.done = val;
1232
1233	if (GRAPHICS_VER(i915) >= `12` &&
1234	(engine->class == VIDEO_DECODE_CLASS \|\|
1235	engine->class == VIDEO_ENHANCEMENT_CLASS \|\|
1236	engine->class == COMPUTE_CLASS \|\|
1237	engine->class == OTHER_CLASS))
1238	engine->tlb_inv.request = _MASKED_BIT_ENABLE(val);
1239	else
1240	engine->tlb_inv.request = val;
1241
1242	return `0`;
1243	}
1244
1245	static int engine_setup_common(struct intel_engine_cs *engine)
1246	{
1247	int err;
1248
1249	init_llist_head(list: &engine->barrier_tasks);
1250
1251	err = intel_engine_init_tlb_invalidation(engine);
1252	if (err)
1253	return err;
1254
1255	err = init_status_page(engine);
1256	if (err)
1257	return err;
1258
1259	engine->breadcrumbs = intel_breadcrumbs_create(irq_engine: engine);
1260	if (!engine->breadcrumbs) {
1261	err = -ENOMEM;
1262	goto err_status;
1263	}
1264
1265	engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL);
1266	if (!engine->sched_engine) {
1267	err = -ENOMEM;
1268	goto err_sched_engine;
1269	}
1270	engine->sched_engine->private_data = engine;
1271
1272	err = intel_engine_init_cmd_parser(engine);
1273	if (err)
1274	goto err_cmd_parser;
1275
1276	intel_engine_init_execlists(engine);
1277	intel_engine_init__pm(engine);
1278	intel_engine_init_retire(engine);
1279
1280	/ Use the whole device by default /
1281	engine->sseu =
1282	intel_sseu_from_device_info(sseu: &engine->gt->info.sseu);
1283
1284	intel_engine_init_workarounds(engine);
1285	intel_engine_init_whitelist(engine);
1286	intel_engine_init_ctx_wa(engine);
1287
1288	if (GRAPHICS_VER(engine->i915) >= `12`)
1289	engine->flags \|= I915_ENGINE_HAS_RELATIVE_MMIO;
1290
1291	return `0`;
1292
1293	err_cmd_parser:
1294	i915_sched_engine_put(sched_engine: engine->sched_engine);
1295	err_sched_engine:
1296	intel_breadcrumbs_put(b: engine->breadcrumbs);
1297	err_status:
1298	cleanup_status_page(engine);
1299	return err;
1300	}
1301
1302	struct measure_breadcrumb {
1303	struct i915_request rq;
1304	struct intel_ring ring;
1305	u32 cs[`2048`];
1306	};
1307
1308	static int measure_breadcrumb_dw(struct intel_context *ce)
1309	{
1310	struct intel_engine_cs *engine = ce->engine;
1311	struct measure_breadcrumb *frame;
1312	int dw;
1313
1314	GEM_BUG_ON(!engine->gt->scratch);
1315
1316	frame = kzalloc(sizeof(*frame), GFP_KERNEL);
1317	if (!frame)
1318	return -ENOMEM;
1319
1320	frame->rq.i915 = engine->i915;
1321	frame->rq.engine = engine;
1322	frame->rq.context = ce;
1323	rcu_assign_pointer(frame->rq.timeline, ce->timeline);
1324	frame->rq.hwsp_seqno = ce->timeline->hwsp_seqno;
1325
1326	frame->ring.vaddr = frame->cs;
1327	frame->ring.size = sizeof(frame->cs);
1328	frame->ring.wrap =
1329	BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size);
1330	frame->ring.effective_size = frame->ring.size;
1331	intel_ring_update_space(ring: &frame->ring);
1332	frame->rq.ring = &frame->ring;
1333
1334	mutex_lock(&ce->timeline->mutex);
1335	spin_lock_irq(lock: &engine->sched_engine->lock);
1336
1337	dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs;
1338
1339	spin_unlock_irq(lock: &engine->sched_engine->lock);
1340	mutex_unlock(lock: &ce->timeline->mutex);
1341
1342	GEM_BUG_ON(dw & `1`); / RING_TAIL must be qword aligned /
1343
1344	kfree(objp: frame);
1345	return dw;
1346	}
1347
1348	struct intel_context *
1349	intel_engine_create_pinned_context(struct intel_engine_cs *engine,
1350	struct i915_address_space *vm,
1351	unsigned int ring_size,
1352	unsigned int hwsp,
1353	struct lock_class_key *key,
1354	const char *name)
1355	{
1356	struct intel_context *ce;
1357	int err;
1358
1359	ce = intel_context_create(engine);
1360	if (IS_ERR(ptr: ce))
1361	return ce;
1362
1363	__set_bit(CONTEXT_BARRIER_BIT, &ce->flags);
1364	ce->timeline = page_pack_bits(NULL, hwsp);
1365	ce->ring = NULL;
1366	ce->ring_size = ring_size;
1367
1368	i915_vm_put(vm: ce->vm);
1369	ce->vm = i915_vm_get(vm);
1370
1371	err = intel_context_pin(ce); / perma-pin so it is always available /
1372	if (err) {
1373	intel_context_put(ce);
1374	return ERR_PTR(error: err);
1375	}
1376
1377	list_add_tail(new: &ce->pinned_contexts_link, head: &engine->pinned_contexts_list);
1378
1379	/*
1380	* Give our perma-pinned kernel timelines a separate lockdep class,
1381	* so that we can use them from within the normal user timelines
1382	* should we need to inject GPU operations during their request
1383	* construction.
1384	*/
1385	lockdep_set_class_and_name(&ce->timeline->mutex, key, name);
1386
1387	return ce;
1388	}
1389
1390	void intel_engine_destroy_pinned_context(struct intel_context *ce)
1391	{
1392	struct intel_engine_cs *engine = ce->engine;
1393	struct i915_vma *hwsp = engine->status_page.vma;
1394
1395	GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp);
1396
1397	mutex_lock(&hwsp->vm->mutex);
1398	list_del(entry: &ce->timeline->engine_link);
1399	mutex_unlock(lock: &hwsp->vm->mutex);
1400
1401	list_del(entry: &ce->pinned_contexts_link);
1402	intel_context_unpin(ce);
1403	intel_context_put(ce);
1404	}
1405
1406	static struct intel_context *
1407	create_ggtt_bind_context(struct intel_engine_cs *engine)
1408	{
1409	static struct lock_class_key kernel;
1410
1411	/*
1412	* MI_UPDATE_GTT can insert up to 511 PTE entries and there could be multiple
1413	* bind requests at a time so get a bigger ring.
1414	*/
1415	return intel_engine_create_pinned_context(engine, vm: engine->gt->vm, SZ_512K,
1416	I915_GEM_HWS_GGTT_BIND_ADDR,
1417	key: &kernel, name: "ggtt_bind_context");
1418	}
1419
1420	static struct intel_context *
1421	create_kernel_context(struct intel_engine_cs *engine)
1422	{
1423	static struct lock_class_key kernel;
1424
1425	return intel_engine_create_pinned_context(engine, vm: engine->gt->vm, SZ_4K,
1426	I915_GEM_HWS_SEQNO_ADDR,
1427	key: &kernel, name: "kernel_context");
1428	}
1429
1430	/*
1431	* engine_init_common - initialize engine state which might require hw access
1432	* @engine: Engine to initialize.
1433	*
1434	* Initializes @engine@ structure members shared between legacy and execlists
1435	* submission modes which do require hardware access.
1436	*
1437	* Typcally done at later stages of submission mode specific engine setup.
1438	*
1439	* Returns zero on success or an error code on failure.
1440	*/
1441	static int engine_init_common(struct intel_engine_cs *engine)
1442	{
1443	struct intel_context ce, bce = NULL;
1444	int ret;
1445
1446	engine->set_default_submission(engine);
1447
1448	/*
1449	* We may need to do things with the shrinker which
1450	* require us to immediately switch back to the default
1451	* context. This can cause a problem as pinning the
1452	* default context also requires GTT space which may not
1453	* be available. To avoid this we always pin the default
1454	* context.
1455	*/
1456	ce = create_kernel_context(engine);
1457	if (IS_ERR(ptr: ce))
1458	return PTR_ERR(ptr: ce);
1459	/*
1460	* Create a separate pinned context for GGTT update with blitter engine
1461	* if a platform require such service. MI_UPDATE_GTT works on other
1462	* engines as well but BCS should be less busy engine so pick that for
1463	* GGTT updates.
1464	*/
1465	if (i915_ggtt_require_binder(i915: engine->i915) && engine->id == BCS0) {
1466	bce = create_ggtt_bind_context(engine);
1467	if (IS_ERR(ptr: bce)) {
1468	ret = PTR_ERR(ptr: bce);
1469	goto err_ce_context;
1470	}
1471	}
1472
1473	ret = measure_breadcrumb_dw(ce);
1474	if (ret < `0`)
1475	goto err_bce_context;
1476
1477	engine->emit_fini_breadcrumb_dw = ret;
1478	engine->kernel_context = ce;
1479	engine->bind_context = bce;
1480
1481	return `0`;
1482
1483	err_bce_context:
1484	if (bce)
1485	intel_engine_destroy_pinned_context(ce: bce);
1486	err_ce_context:
1487	intel_engine_destroy_pinned_context(ce);
1488	return ret;
1489	}
1490
1491	int intel_engines_init(struct intel_gt *gt)
1492	{
1493	int (setup)(struct* intel_engine_cs *engine);
1494	struct intel_engine_cs *engine;
1495	enum intel_engine_id id;
1496	int err;
1497
1498	if (intel_uc_uses_guc_submission(uc: &gt->uc)) {
1499	gt->submission_method = INTEL_SUBMISSION_GUC;
1500	setup = intel_guc_submission_setup;
1501	} else if (HAS_EXECLISTS(gt->i915)) {
1502	gt->submission_method = INTEL_SUBMISSION_ELSP;
1503	setup = intel_execlists_submission_setup;
1504	} else {
1505	gt->submission_method = INTEL_SUBMISSION_RING;
1506	setup = intel_ring_submission_setup;
1507	}
1508
1509	for_each_engine(engine, gt, id) {
1510	err = engine_setup_common(engine);
1511	if (err)
1512	return err;
1513
1514	err = setup(engine);
1515	if (err) {
1516	intel_engine_cleanup_common(engine);
1517	return err;
1518	}
1519
1520	/ The backend should now be responsible for cleanup /
1521	GEM_BUG_ON(engine->release == NULL);
1522
1523	err = engine_init_common(engine);
1524	if (err)
1525	return err;
1526
1527	intel_engine_add_user(engine);
1528	}
1529
1530	return `0`;
1531	}
1532
1533	/**
1534	* intel_engine_cleanup_common - cleans up the engine state created by
1535	* the common initializers.
1536	* @engine: Engine to cleanup.
1537	*
1538	* This cleans up everything created by the common helpers.
1539	*/
1540	void intel_engine_cleanup_common(struct intel_engine_cs *engine)
1541	{
1542	GEM_BUG_ON(!list_empty(&engine->sched_engine->requests));
1543
1544	i915_sched_engine_put(sched_engine: engine->sched_engine);
1545	intel_breadcrumbs_put(b: engine->breadcrumbs);
1546
1547	intel_engine_fini_retire(engine);
1548	intel_engine_cleanup_cmd_parser(engine);
1549
1550	if (engine->default_state)
1551	fput(engine->default_state);
1552
1553	if (engine->kernel_context)
1554	intel_engine_destroy_pinned_context(ce: engine->kernel_context);
1555
1556	if (engine->bind_context)
1557	intel_engine_destroy_pinned_context(ce: engine->bind_context);
1558
1559
1560	GEM_BUG_ON(!llist_empty(&engine->barrier_tasks));
1561	cleanup_status_page(engine);
1562
1563	intel_wa_list_free(wal: &engine->ctx_wa_list);
1564	intel_wa_list_free(wal: &engine->wa_list);
1565	intel_wa_list_free(wal: &engine->whitelist);
1566	}
1567
1568	/**
1569	* intel_engine_resume - re-initializes the HW state of the engine
1570	* @engine: Engine to resume.
1571	*
1572	* Returns zero on success or an error code on failure.
1573	*/
1574	int intel_engine_resume(struct intel_engine_cs *engine)
1575	{
1576	intel_engine_apply_workarounds(engine);
1577	intel_engine_apply_whitelist(engine);
1578
1579	return engine->resume(engine);
1580	}
1581
1582	u64 intel_engine_get_active_head(const struct intel_engine_cs *engine)
1583	{
1584	struct drm_i915_private *i915 = engine->i915;
1585
1586	u64 acthd;
1587
1588	if (GRAPHICS_VER(i915) >= `8`)
1589	acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW);
1590	else if (GRAPHICS_VER(i915) >= `4`)
1591	acthd = ENGINE_READ(engine, RING_ACTHD);
1592	else
1593	acthd = ENGINE_READ(engine, ACTHD);
1594
1595	return acthd;
1596	}
1597
1598	u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine)
1599	{
1600	u64 bbaddr;
1601
1602	if (GRAPHICS_VER(engine->i915) >= `8`)
1603	bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW);
1604	else
1605	bbaddr = ENGINE_READ(engine, RING_BBADDR);
1606
1607	return bbaddr;
1608	}
1609
1610	static unsigned long stop_timeout(const struct intel_engine_cs *engine)
1611	{
1612	if (in_atomic() \|\| irqs_disabled()) / inside atomic preempt-reset? /
1613	return `0`;
1614
1615	/*
1616	* If we are doing a normal GPU reset, we can take our time and allow
1617	* the engine to quiesce. We've stopped submission to the engine, and
1618	* if we wait long enough an innocent context should complete and
1619	* leave the engine idle. So they should not be caught unaware by
1620	* the forthcoming GPU reset (which usually follows the stop_cs)!
1621	*/
1622	return READ_ONCE(engine->props.stop_timeout_ms);
1623	}
1624
1625	static int __intel_engine_stop_cs(struct intel_engine_cs *engine,
1626	int fast_timeout_us,
1627	int slow_timeout_ms)
1628	{
1629	struct intel_uncore *uncore = engine->uncore;
1630	const i915_reg_t mode = RING_MI_MODE(engine->mmio_base);
1631	int err;
1632
1633	intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
1634
1635	/*
1636	* Wa_22011802037: Prior to doing a reset, ensure CS is
1637	* stopped, set ring stop bit and prefetch disable bit to halt CS
1638	*/
1639	if (intel_engine_reset_needs_wa_22011802037(gt: engine->gt))
1640	intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
1641	_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
1642
1643	err = __intel_wait_for_register_fw(uncore: engine->uncore, reg: mode,
1644	MODE_IDLE, MODE_IDLE,
1645	fast_timeout_us,
1646	slow_timeout_ms,
1647	NULL);
1648
1649	/ A final mmio read to let GPU writes be hopefully flushed to memory /
1650	intel_uncore_posting_read_fw(uncore, mode);
1651	return err;
1652	}
1653
1654	int intel_engine_stop_cs(struct intel_engine_cs *engine)
1655	{
1656	int err = `0`;
1657
1658	if (GRAPHICS_VER(engine->i915) < `3`)
1659	return -ENODEV;
1660
1661	ENGINE_TRACE(engine, "\n");
1662	/*
1663	* TODO: Find out why occasionally stopping the CS times out. Seen
1664	* especially with gem_eio tests.
1665	*
1666	* Occasionally trying to stop the cs times out, but does not adversely
1667	* affect functionality. The timeout is set as a config parameter that
1668	* defaults to 100ms. In most cases the follow up operation is to wait
1669	* for pending MI_FORCE_WAKES. The assumption is that this timeout is
1670	* sufficient for any pending MI_FORCEWAKEs to complete. Once root
1671	* caused, the caller must check and handle the return from this
1672	* function.
1673	*/
1674	if (__intel_engine_stop_cs(engine, fast_timeout_us: `1000`, slow_timeout_ms: stop_timeout(engine))) {
1675	ENGINE_TRACE(engine,
1676	"timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
1677	ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR,
1678	ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR);
1679
1680	/*
1681	* Sometimes we observe that the idle flag is not
1682	* set even though the ring is empty. So double
1683	* check before giving up.
1684	*/
1685	if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) !=
1686	(ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR))
1687	err = -ETIMEDOUT;
1688	}
1689
1690	return err;
1691	}
1692
1693	void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine)
1694	{
1695	ENGINE_TRACE(engine, "\n");
1696
1697	ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
1698	}
1699
1700	static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
1701	{
1702	static const i915_reg_t _reg[I915_NUM_ENGINES] = {
1703	[RCS0] = MSG_IDLE_CS,
1704	[BCS0] = MSG_IDLE_BCS,
1705	[VCS0] = MSG_IDLE_VCS0,
1706	[VCS1] = MSG_IDLE_VCS1,
1707	[VCS2] = MSG_IDLE_VCS2,
1708	[VCS3] = MSG_IDLE_VCS3,
1709	[VCS4] = MSG_IDLE_VCS4,
1710	[VCS5] = MSG_IDLE_VCS5,
1711	[VCS6] = MSG_IDLE_VCS6,
1712	[VCS7] = MSG_IDLE_VCS7,
1713	[VECS0] = MSG_IDLE_VECS0,
1714	[VECS1] = MSG_IDLE_VECS1,
1715	[VECS2] = MSG_IDLE_VECS2,
1716	[VECS3] = MSG_IDLE_VECS3,
1717	[CCS0] = MSG_IDLE_CS,
1718	[CCS1] = MSG_IDLE_CS,
1719	[CCS2] = MSG_IDLE_CS,
1720	[CCS3] = MSG_IDLE_CS,
1721	};
1722	u32 val;
1723
1724	if (!_reg[engine->id].reg)
1725	return `0`;
1726
1727	val = intel_uncore_read(uncore: engine->uncore, reg: _reg[engine->id]);
1728
1729	/ bits[29:25] & bits[13:9] >> shift /
1730	return (val & (val >> `16`) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
1731	}
1732
1733	static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
1734	{
1735	int ret;
1736
1737	/ Ensure GPM receives fw up/down after CS is stopped /
1738	udelay(usec: `1`);
1739
1740	/ Wait for forcewake request to complete in GPM /
1741	ret = __intel_wait_for_register_fw(uncore: gt->uncore,
1742	GEN9_PWRGT_DOMAIN_STATUS,
1743	mask: fw_mask, value: fw_mask, fast_timeout_us: `5000`, slow_timeout_ms: `0`, NULL);
1744
1745	/ Ensure CS receives fw ack from GPM /
1746	udelay(usec: `1`);
1747
1748	if (ret)
1749	GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
1750	}
1751
1752	/*
1753	* Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
1754	* pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
1755	* pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
1756	* MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
1757	* are concerned only with the gt reset here, we use a logical OR of pending
1758	* forcewakeups from all reset domains and then wait for them to complete by
1759	* querying PWRGT_DOMAIN_STATUS.
1760	*/
1761	void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
1762	{
1763	u32 fw_pending = __cs_pending_mi_force_wakes(engine);
1764
1765	if (fw_pending)
1766	__gpm_wait_for_fw_complete(gt: engine->gt, fw_mask: fw_pending);
1767	}
1768
1769	/ NB: please notice the memset /
1770	void intel_engine_get_instdone(const struct intel_engine_cs *engine,
1771	struct intel_instdone *instdone)
1772	{
1773	struct drm_i915_private *i915 = engine->i915;
1774	struct intel_uncore *uncore = engine->uncore;
1775	u32 mmio_base = engine->mmio_base;
1776	int slice;
1777	int subslice;
1778	int iter;
1779
1780	memset(instdone, `0`, sizeof(*instdone));
1781
1782	if (GRAPHICS_VER(i915) >= `8`) {
1783	instdone->instdone =
1784	intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1785
1786	if (engine->id != RCS0)
1787	return;
1788
1789	instdone->slice_common =
1790	intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1791	if (GRAPHICS_VER(i915) >= `12`) {
1792	instdone->slice_common_extra[`0`] =
1793	intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA);
1794	instdone->slice_common_extra[`1`] =
1795	intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2);
1796	}
1797
1798	for_each_ss_steering(iter, engine->gt, slice, subslice) {
1799	instdone->sampler[slice][subslice] =
1800	intel_gt_mcr_read(gt: engine->gt,
1801	GEN8_SAMPLER_INSTDONE,
1802	group: slice, instance: subslice);
1803	instdone->row[slice][subslice] =
1804	intel_gt_mcr_read(gt: engine->gt,
1805	GEN8_ROW_INSTDONE,
1806	group: slice, instance: subslice);
1807	}
1808
1809	if (GRAPHICS_VER_FULL(i915) >= IP_VER(`12`, `55`)) {
1810	for_each_ss_steering(iter, engine->gt, slice, subslice)
1811	instdone->geom_svg[slice][subslice] =
1812	intel_gt_mcr_read(gt: engine->gt,
1813	XEHPG_INSTDONE_GEOM_SVG,
1814	group: slice, instance: subslice);
1815	}
1816	} else if (GRAPHICS_VER(i915) >= `7`) {
1817	instdone->instdone =
1818	intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1819
1820	if (engine->id != RCS0)
1821	return;
1822
1823	instdone->slice_common =
1824	intel_uncore_read(uncore, GEN7_SC_INSTDONE);
1825	instdone->sampler[`0`][`0`] =
1826	intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE);
1827	instdone->row[`0`][`0`] =
1828	intel_uncore_read(uncore, GEN7_ROW_INSTDONE);
1829	} else if (GRAPHICS_VER(i915) >= `4`) {
1830	instdone->instdone =
1831	intel_uncore_read(uncore, RING_INSTDONE(mmio_base));
1832	if (engine->id == RCS0)
1833	/ HACK: Using the wrong struct member /
1834	instdone->slice_common =
1835	intel_uncore_read(uncore, GEN4_INSTDONE1);
1836	} else {
1837	instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE);
1838	}
1839	}
1840
1841	static bool ring_is_idle(struct intel_engine_cs *engine)
1842	{
1843	bool idle = true;
1844
1845	if (I915_SELFTEST_ONLY(!engine->mmio_base))
1846	return true;
1847
1848	if (!intel_engine_pm_get_if_awake(engine))
1849	return true;
1850
1851	/ First check that no commands are left in the ring /
1852	if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) !=
1853	(ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR))
1854	idle = false;
1855
1856	/ No bit for gen2, so assume the CS parser is idle /
1857	if (GRAPHICS_VER(engine->i915) > `2` &&
1858	!(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE))
1859	idle = false;
1860
1861	intel_engine_pm_put(engine);
1862
1863	return idle;
1864	}
1865
1866	void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync)
1867	{
1868	struct tasklet_struct *t = &engine->sched_engine->tasklet;
1869
1870	if (!t->callback)
1871	return;
1872
1873	local_bh_disable();
1874	if (tasklet_trylock(t)) {
1875	/ Must wait for any GPU reset in progress. /
1876	if (__tasklet_is_enabled(t))
1877	t->callback(t);
1878	tasklet_unlock(t);
1879	}
1880	local_bh_enable();
1881
1882	/ Synchronise and wait for the tasklet on another CPU /
1883	if (sync)
1884	tasklet_unlock_wait(t);
1885	}
1886
1887	/**
1888	* intel_engine_is_idle() - Report if the engine has finished process all work
1889	* @engine: the intel_engine_cs
1890	*
1891	* Return true if there are no requests pending, nothing left to be submitted
1892	* to hardware, and that the engine is idle.
1893	*/
1894	bool intel_engine_is_idle(struct intel_engine_cs *engine)
1895	{
1896	/ More white lies, if wedged, hw state is inconsistent /
1897	if (intel_gt_is_wedged(gt: engine->gt))
1898	return true;
1899
1900	if (!intel_engine_pm_is_awake(engine))
1901	return true;
1902
1903	/ Waiting to drain ELSP? /
1904	intel_synchronize_hardirq(i915: engine->i915);
1905	intel_engine_flush_submission(engine);
1906
1907	/ ELSP is empty, but there are ready requests? E.g. after reset /
1908	if (!i915_sched_engine_is_empty(sched_engine: engine->sched_engine))
1909	return false;
1910
1911	/ Ring stopped? /
1912	return ring_is_idle(engine);
1913	}
1914
1915	bool intel_engines_are_idle(struct intel_gt *gt)
1916	{
1917	struct intel_engine_cs *engine;
1918	enum intel_engine_id id;
1919
1920	/*
1921	* If the driver is wedged, HW state may be very inconsistent and
1922	* report that it is still busy, even though we have stopped using it.
1923	*/
1924	if (intel_gt_is_wedged(gt))
1925	return true;
1926
1927	/ Already parked (and passed an idleness test); must still be idle /
1928	if (!READ_ONCE(gt->awake))
1929	return true;
1930
1931	for_each_engine(engine, gt, id) {
1932	if (!intel_engine_is_idle(engine))
1933	return false;
1934	}
1935
1936	return true;
1937	}
1938
1939	bool intel_engine_irq_enable(struct intel_engine_cs *engine)
1940	{
1941	if (!engine->irq_enable)
1942	return false;
1943
1944	/ Caller disables interrupts /
1945	spin_lock(lock: engine->gt->irq_lock);
1946	engine->irq_enable(engine);
1947	spin_unlock(lock: engine->gt->irq_lock);
1948
1949	return true;
1950	}
1951
1952	void intel_engine_irq_disable(struct intel_engine_cs *engine)
1953	{
1954	if (!engine->irq_disable)
1955	return;
1956
1957	/ Caller disables interrupts /
1958	spin_lock(lock: engine->gt->irq_lock);
1959	engine->irq_disable(engine);
1960	spin_unlock(lock: engine->gt->irq_lock);
1961	}
1962
1963	void intel_engines_reset_default_submission(struct intel_gt *gt)
1964	{
1965	struct intel_engine_cs *engine;
1966	enum intel_engine_id id;
1967
1968	for_each_engine(engine, gt, id) {
1969	if (engine->sanitize)
1970	engine->sanitize(engine);
1971
1972	engine->set_default_submission(engine);
1973	}
1974	}
1975
1976	bool intel_engine_can_store_dword(struct intel_engine_cs *engine)
1977	{
1978	switch (GRAPHICS_VER(engine->i915)) {
1979	case `2`:
1980	return false; / uses physical not virtual addresses /
1981	case `3`:
1982	/ maybe only uses physical not virtual addresses /
1983	return !(IS_I915G(engine->i915) \|\| IS_I915GM(engine->i915));
1984	case `4`:
1985	return !IS_I965G(engine->i915); / who knows! /
1986	case `6`:
1987	return engine->class != VIDEO_DECODE_CLASS; / b0rked /
1988	default:
1989	return true;
1990	}
1991	}
1992
1993	static struct intel_timeline get_timeline(struct* i915_request *rq)
1994	{
1995	struct intel_timeline *tl;
1996
1997	/*
1998	* Even though we are holding the engine->sched_engine->lock here, there
1999	* is no control over the submission queue per-se and we are
2000	* inspecting the active state at a random point in time, with an
2001	* unknown queue. Play safe and make sure the timeline remains valid.
2002	* (Only being used for pretty printing, one extra kref shouldn't
2003	* cause a camel stampede!)
2004	*/
2005	rcu_read_lock();
2006	tl = rcu_dereference(rq->timeline);
2007	if (!kref_get_unless_zero(kref: &tl->kref))
2008	tl = NULL;
2009	rcu_read_unlock();
2010
2011	return tl;
2012	}
2013
2014	static int print_ring(char buf, int* sz, struct i915_request *rq)
2015	{
2016	int len = `0`;
2017
2018	if (!i915_request_signaled(rq)) {
2019	struct intel_timeline *tl = get_timeline(rq);
2020
2021	len = scnprintf(buf, size: sz,
2022	fmt: "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ",
2023	i915_ggtt_offset(vma: rq->ring->vma),
2024	tl ? tl->hwsp_offset : `0`,
2025	hwsp_seqno(rq),
2026	DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context),
2027	`1000` * `1000`));
2028
2029	if (tl)
2030	intel_timeline_put(timeline: tl);
2031	}
2032
2033	return len;
2034	}
2035
2036	static void hexdump(struct drm_printer m, const* void *buf, size_t len)
2037	{
2038	const size_t rowsize = `8` * sizeof(u32);
2039	const void *prev = NULL;
2040	bool skip = false;
2041	size_t pos;
2042
2043	for (pos = `0`; pos < len; pos += rowsize) {
2044	char line[`128`];
2045
2046	if (prev && !memcmp(p: prev, q: buf + pos, size: rowsize)) {
2047	if (!skip) {
2048	drm_printf(p: m, f: "*\n");
2049	skip = true;
2050	}
2051	continue;
2052	}
2053
2054	WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
2055	rowsize, sizeof(u32),
2056	line, sizeof(line),
2057	false) >= sizeof(line));
2058	drm_printf(p: m, f: "[%04zx] %s\n", pos, line);
2059
2060	prev = buf + pos;
2061	skip = false;
2062	}
2063	}
2064
2065	static const char repr_timer(const* struct timer_list *t)
2066	{
2067	if (!READ_ONCE(t->expires))
2068	return "inactive";
2069
2070	if (timer_pending(timer: t))
2071	return "active";
2072
2073	return "expired";
2074	}
2075
2076	static void intel_engine_print_registers(struct intel_engine_cs *engine,
2077	struct drm_printer *m)
2078	{
2079	struct drm_i915_private *i915 = engine->i915;
2080	struct intel_engine_execlists * const execlists = &engine->execlists;
2081	u64 addr;
2082
2083	if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(i915, `4`, `7`))
2084	drm_printf(p: m, f: "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID));
2085	if (HAS_EXECLISTS(i915)) {
2086	drm_printf(p: m, f: "\tEL_STAT_HI: 0x%08x\n",
2087	ENGINE_READ(engine, RING_EXECLIST_STATUS_HI));
2088	drm_printf(p: m, f: "\tEL_STAT_LO: 0x%08x\n",
2089	ENGINE_READ(engine, RING_EXECLIST_STATUS_LO));
2090	}
2091	drm_printf(p: m, f: "\tRING_START: 0x%08x\n",
2092	ENGINE_READ(engine, RING_START));
2093	drm_printf(p: m, f: "\tRING_HEAD: 0x%08x\n",
2094	ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR);
2095	drm_printf(p: m, f: "\tRING_TAIL: 0x%08x\n",
2096	ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR);
2097	drm_printf(p: m, f: "\tRING_CTL: 0x%08x%s\n",
2098	ENGINE_READ(engine, RING_CTL),
2099	ENGINE_READ(engine, RING_CTL) & (RING_WAIT \| RING_WAIT_SEMAPHORE) ? " [waiting]" : "");
2100	if (GRAPHICS_VER(engine->i915) > `2`) {
2101	drm_printf(p: m, f: "\tRING_MODE: 0x%08x%s\n",
2102	ENGINE_READ(engine, RING_MI_MODE),
2103	ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : "");
2104	}
2105
2106	if (GRAPHICS_VER(i915) >= `6`) {
2107	drm_printf(p: m, f: "\tRING_IMR: 0x%08x\n",
2108	ENGINE_READ(engine, RING_IMR));
2109	drm_printf(p: m, f: "\tRING_ESR: 0x%08x\n",
2110	ENGINE_READ(engine, RING_ESR));
2111	drm_printf(p: m, f: "\tRING_EMR: 0x%08x\n",
2112	ENGINE_READ(engine, RING_EMR));
2113	drm_printf(p: m, f: "\tRING_EIR: 0x%08x\n",
2114	ENGINE_READ(engine, RING_EIR));
2115	}
2116
2117	addr = intel_engine_get_active_head(engine);
2118	drm_printf(p: m, f: "\tACTHD: 0x%08x_%08x\n",
2119	upper_32_bits(addr), lower_32_bits(addr));
2120	addr = intel_engine_get_last_batch_head(engine);
2121	drm_printf(p: m, f: "\tBBADDR: 0x%08x_%08x\n",
2122	upper_32_bits(addr), lower_32_bits(addr));
2123	if (GRAPHICS_VER(i915) >= `8`)
2124	addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW);
2125	else if (GRAPHICS_VER(i915) >= `4`)
2126	addr = ENGINE_READ(engine, RING_DMA_FADD);
2127	else
2128	addr = ENGINE_READ(engine, DMA_FADD_I8XX);
2129	drm_printf(p: m, f: "\tDMA_FADDR: 0x%08x_%08x\n",
2130	upper_32_bits(addr), lower_32_bits(addr));
2131	if (GRAPHICS_VER(i915) >= `4`) {
2132	drm_printf(p: m, f: "\tIPEIR: 0x%08x\n",
2133	ENGINE_READ(engine, RING_IPEIR));
2134	drm_printf(p: m, f: "\tIPEHR: 0x%08x\n",
2135	ENGINE_READ(engine, RING_IPEHR));
2136	} else {
2137	drm_printf(p: m, f: "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR));
2138	drm_printf(p: m, f: "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR));
2139	}
2140
2141	if (HAS_EXECLISTS(i915) && !intel_engine_uses_guc(engine)) {
2142	struct i915_request * const port, rq;
2143	const u32 *hws =
2144	&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
2145	const u8 num_entries = execlists->csb_size;
2146	unsigned int idx;
2147	u8 read, write;
2148
2149	drm_printf(p: m, f: "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n",
2150	str_yes_no(test_bit(TASKLET_STATE_SCHED, &engine->sched_engine->tasklet.state)),
2151	str_enabled_disabled(v: !atomic_read(v: &engine->sched_engine->tasklet.count)),
2152	repr_timer(t: &engine->execlists.preempt),
2153	repr_timer(t: &engine->execlists.timer));
2154
2155	read = execlists->csb_head;
2156	write = READ_ONCE(*execlists->csb_write);
2157
2158	drm_printf(p: m, f: "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n",
2159	ENGINE_READ(engine, RING_EXECLIST_STATUS_LO),
2160	ENGINE_READ(engine, RING_EXECLIST_STATUS_HI),
2161	read, write, num_entries);
2162
2163	if (read >= num_entries)
2164	read = `0`;
2165	if (write >= num_entries)
2166	write = `0`;
2167	if (read > write)
2168	write += num_entries;
2169	while (read < write) {
2170	idx = ++read % num_entries;
2171	drm_printf(p: m, f: "\tExeclist CSB[%d]: 0x%08x, context: %d\n",
2172	idx, hws[idx * `2`], hws[idx * `2` + `1`]);
2173	}
2174
2175	i915_sched_engine_active_lock_bh(sched_engine: engine->sched_engine);
2176	rcu_read_lock();
2177	for (port = execlists->active; (rq = *port); port++) {
2178	char hdr[`160`];
2179	int len;
2180
2181	len = scnprintf(buf: hdr, size: sizeof(hdr),
2182	fmt: "\t\tActive[%d]: ccid:%08x%s%s, ",
2183	(int)(port - execlists->active),
2184	rq->context->lrc.ccid,
2185	intel_context_is_closed(ce: rq->context) ? "!" : "",
2186	intel_context_is_banned(ce: rq->context) ? "*" : "");
2187	len += print_ring(buf: hdr + len, sz: sizeof(hdr) - len, rq);
2188	scnprintf(buf: hdr + len, size: sizeof(hdr) - len, fmt: "rq: ");
2189	i915_request_show(m, rq, prefix: hdr, indent: `0`);
2190	}
2191	for (port = execlists->pending; (rq = *port); port++) {
2192	char hdr[`160`];
2193	int len;
2194
2195	len = scnprintf(buf: hdr, size: sizeof(hdr),
2196	fmt: "\t\tPending[%d]: ccid:%08x%s%s, ",
2197	(int)(port - execlists->pending),
2198	rq->context->lrc.ccid,
2199	intel_context_is_closed(ce: rq->context) ? "!" : "",
2200	intel_context_is_banned(ce: rq->context) ? "*" : "");
2201	len += print_ring(buf: hdr + len, sz: sizeof(hdr) - len, rq);
2202	scnprintf(buf: hdr + len, size: sizeof(hdr) - len, fmt: "rq: ");
2203	i915_request_show(m, rq, prefix: hdr, indent: `0`);
2204	}
2205	rcu_read_unlock();
2206	i915_sched_engine_active_unlock_bh(sched_engine: engine->sched_engine);
2207	} else if (GRAPHICS_VER(i915) > `6`) {
2208	drm_printf(p: m, f: "\tPP_DIR_BASE: 0x%08x\n",
2209	ENGINE_READ(engine, RING_PP_DIR_BASE));
2210	drm_printf(p: m, f: "\tPP_DIR_BASE_READ: 0x%08x\n",
2211	ENGINE_READ(engine, RING_PP_DIR_BASE_READ));
2212	drm_printf(p: m, f: "\tPP_DIR_DCLV: 0x%08x\n",
2213	ENGINE_READ(engine, RING_PP_DIR_DCLV));
2214	}
2215	}
2216
2217	static void print_request_ring(struct drm_printer m, struct* i915_request *rq)
2218	{
2219	struct i915_vma_resource *vma_res = rq->batch_res;
2220	void *ring;
2221	int size;
2222
2223	drm_printf(p: m,
2224	f: "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n",
2225	rq->head, rq->postfix, rq->tail,
2226	vma_res ? upper_32_bits(vma_res->start) : ~`0u`,
2227	vma_res ? lower_32_bits(vma_res->start) : ~`0u`);
2228
2229	size = rq->tail - rq->head;
2230	if (rq->tail < rq->head)
2231	size += rq->ring->size;
2232
2233	ring = kmalloc(size, GFP_ATOMIC);
2234	if (ring) {
2235	const void *vaddr = rq->ring->vaddr;
2236	unsigned int head = rq->head;
2237	unsigned int len = `0`;
2238
2239	if (rq->tail < head) {
2240	len = rq->ring->size - head;
2241	memcpy(ring, vaddr + head, len);
2242	head = `0`;
2243	}
2244	memcpy(ring + len, vaddr + head, size - len);
2245
2246	hexdump(m, buf: ring, len: size);
2247	kfree(objp: ring);
2248	}
2249	}
2250
2251	static unsigned long read_ul(void *p, size_t x)
2252	{
2253	return (unsigned* long *)(p + x);
2254	}
2255
2256	static void print_properties(struct intel_engine_cs *engine,
2257	struct drm_printer *m)
2258	{
2259	static const struct pmap {
2260	size_t offset;
2261	const char *name;
2262	} props[] = {
2263	#define P(x) { \
2264	.offset = offsetof(typeof(engine->props), x), \
2265	.name = #x \
2266	}
2267	P(heartbeat_interval_ms),
2268	P(max_busywait_duration_ns),
2269	P(preempt_timeout_ms),
2270	P(stop_timeout_ms),
2271	P(timeslice_duration_ms),
2272
2273	{},
2274	#undef P
2275	};
2276	const struct pmap *p;
2277
2278	drm_printf(p: m, f: "\tProperties:\n");
2279	for (p = props; p->name; p++)
2280	drm_printf(p: m, f: "\t\t%s: %lu [default %lu]\n",
2281	p->name,
2282	read_ul(p: &engine->props, x: p->offset),
2283	read_ul(p: &engine->defaults, x: p->offset));
2284	}
2285
2286	static void engine_dump_request(struct i915_request rq, struct* drm_printer m, const* char *msg)
2287	{
2288	struct intel_timeline *tl = get_timeline(rq);
2289
2290	i915_request_show(m, rq, prefix: msg, indent: `0`);
2291
2292	drm_printf(p: m, f: "\t\tring->start: 0x%08x\n",
2293	i915_ggtt_offset(vma: rq->ring->vma));
2294	drm_printf(p: m, f: "\t\tring->head: 0x%08x\n",
2295	rq->ring->head);
2296	drm_printf(p: m, f: "\t\tring->tail: 0x%08x\n",
2297	rq->ring->tail);
2298	drm_printf(p: m, f: "\t\tring->emit: 0x%08x\n",
2299	rq->ring->emit);
2300	drm_printf(p: m, f: "\t\tring->space: 0x%08x\n",
2301	rq->ring->space);
2302
2303	if (tl) {
2304	drm_printf(p: m, f: "\t\tring->hwsp: 0x%08x\n",
2305	tl->hwsp_offset);
2306	intel_timeline_put(timeline: tl);
2307	}
2308
2309	print_request_ring(m, rq);
2310
2311	if (rq->context->lrc_reg_state) {
2312	drm_printf(p: m, f: "Logical Ring Context:\n");
2313	hexdump(m, buf: rq->context->lrc_reg_state, PAGE_SIZE);
2314	}
2315	}
2316
2317	void intel_engine_dump_active_requests(struct list_head *requests,
2318	struct i915_request *hung_rq,
2319	struct drm_printer *m)
2320	{
2321	struct i915_request *rq;
2322	const char *msg;
2323	enum i915_request_state state;
2324
2325	list_for_each_entry(rq, requests, sched.link) {
2326	if (rq == hung_rq)
2327	continue;
2328
2329	state = i915_test_request_state(rq);
2330	if (state < I915_REQUEST_QUEUED)
2331	continue;
2332
2333	if (state == I915_REQUEST_ACTIVE)
2334	msg = "\t\tactive on engine";
2335	else
2336	msg = "\t\tactive in queue";
2337
2338	engine_dump_request(rq, m, msg);
2339	}
2340	}
2341
2342	static void engine_dump_active_requests(struct intel_engine_cs *engine,
2343	struct drm_printer *m)
2344	{
2345	struct intel_context *hung_ce = NULL;
2346	struct i915_request *hung_rq = NULL;
2347
2348	/*
2349	* No need for an engine->irq_seqno_barrier() before the seqno reads.
2350	* The GPU is still running so requests are still executing and any
2351	* hardware reads will be out of date by the time they are reported.
2352	* But the intention here is just to report an instantaneous snapshot
2353	* so that's fine.
2354	*/
2355	intel_engine_get_hung_entity(engine, ce: &hung_ce, rq: &hung_rq);
2356
2357	drm_printf(p: m, f: "\tRequests:\n");
2358
2359	if (hung_rq)
2360	engine_dump_request(rq: hung_rq, m, msg: "\t\thung");
2361	else if (hung_ce)
2362	drm_printf(p: m, f: "\t\tGot hung ce but no hung rq!\n");
2363
2364	if (intel_uc_uses_guc_submission(uc: &engine->gt->uc))
2365	intel_guc_dump_active_requests(engine, hung_rq, m);
2366	else
2367	intel_execlists_dump_active_requests(engine, hung_rq, m);
2368
2369	if (hung_rq)
2370	i915_request_put(rq: hung_rq);
2371	}
2372
2373	void intel_engine_dump(struct intel_engine_cs *engine,
2374	struct drm_printer *m,
2375	const char *header, ...)
2376	{
2377	struct i915_gpu_error * const error = &engine->i915->gpu_error;
2378	struct i915_request *rq;
2379	intel_wakeref_t wakeref;
2380	ktime_t dummy;
2381
2382	if (header) {
2383	va_list ap;
2384
2385	va_start(ap, header);
2386	drm_vprintf(p: m, fmt: header, va: &ap);
2387	va_end(ap);
2388	}
2389
2390	if (intel_gt_is_wedged(gt: engine->gt))
2391	drm_printf(p: m, f: "* WEDGED *\n");
2392
2393	drm_printf(p: m, f: "\tAwake? %d\n", atomic_read(v: &engine->wakeref.count));
2394	drm_printf(p: m, f: "\tBarriers?: %s\n",
2395	str_yes_no(v: !llist_empty(head: &engine->barrier_tasks)));
2396	drm_printf(p: m, f: "\tLatency: %luus\n",
2397	ewma__engine_latency_read(e: &engine->latency));
2398	if (intel_engine_supports_stats(engine))
2399	drm_printf(p: m, f: "\tRuntime: %llums\n",
2400	ktime_to_ms(kt: intel_engine_get_busy_time(engine,
2401	now: &dummy)));
2402	drm_printf(p: m, f: "\tForcewake: %x domains, %d active\n",
2403	engine->fw_domain, READ_ONCE(engine->fw_active));
2404
2405	rcu_read_lock();
2406	rq = READ_ONCE(engine->heartbeat.systole);
2407	if (rq)
2408	drm_printf(p: m, f: "\tHeartbeat: %d ms ago\n",
2409	jiffies_to_msecs(j: jiffies - rq->emitted_jiffies));
2410	rcu_read_unlock();
2411	drm_printf(p: m, f: "\tReset count: %d (global %d)\n",
2412	i915_reset_engine_count(error, engine),
2413	i915_reset_count(error));
2414	print_properties(engine, m);
2415
2416	engine_dump_active_requests(engine, m);
2417
2418	drm_printf(p: m, f: "\tMMIO base: 0x%08x\n", engine->mmio_base);
2419	wakeref = intel_runtime_pm_get_if_in_use(rpm: engine->uncore->rpm);
2420	if (wakeref) {
2421	intel_engine_print_registers(engine, m);
2422	intel_runtime_pm_put(rpm: engine->uncore->rpm, wref: wakeref);
2423	} else {
2424	drm_printf(p: m, f: "\tDevice is asleep; skipping register dump\n");
2425	}
2426
2427	intel_execlists_show_requests(engine, m, show_request: i915_request_show, max: `8`);
2428
2429	drm_printf(p: m, f: "HWSP:\n");
2430	hexdump(m, buf: engine->status_page.addr, PAGE_SIZE);
2431
2432	drm_printf(p: m, f: "Idle? %s\n", str_yes_no(v: intel_engine_is_idle(engine)));
2433
2434	intel_engine_print_breadcrumbs(engine, p: m);
2435	}
2436
2437	/**
2438	* intel_engine_get_busy_time() - Return current accumulated engine busyness
2439	* @engine: engine to report on
2440	* @now: monotonic timestamp of sampling
2441	*
2442	* Returns accumulated time @engine was busy since engine stats were enabled.
2443	*/
2444	ktime_t intel_engine_get_busy_time(struct intel_engine_cs engine, ktime_t now)
2445	{
2446	return engine->busyness(engine, now);
2447	}
2448
2449	struct intel_context *
2450	intel_engine_create_virtual(struct intel_engine_cs **siblings,
2451	unsigned int count, unsigned long flags)
2452	{
2453	if (count == `0`)
2454	return ERR_PTR(error: -EINVAL);
2455
2456	if (count == `1` && !(flags & FORCE_VIRTUAL))
2457	return intel_context_create(engine: siblings[`0`]);
2458
2459	GEM_BUG_ON(!siblings[`0`]->cops->create_virtual);
2460	return siblings[`0`]->cops->create_virtual(siblings, count, flags);
2461	}
2462
2463	static struct i915_request engine_execlist_find_hung_request(struct* intel_engine_cs *engine)
2464	{
2465	struct i915_request request, active = NULL;
2466
2467	/*
2468	* This search does not work in GuC submission mode. However, the GuC
2469	* will report the hanging context directly to the driver itself. So
2470	* the driver should never get here when in GuC mode.
2471	*/
2472	GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc));
2473
2474	/*
2475	* We are called by the error capture, reset and to dump engine
2476	* state at random points in time. In particular, note that neither is
2477	* crucially ordered with an interrupt. After a hang, the GPU is dead
2478	* and we assume that no more writes can happen (we waited long enough
2479	* for all writes that were in transaction to be flushed) - adding an
2480	* extra delay for a recent interrupt is pointless. Hence, we do
2481	* not need an engine->irq_seqno_barrier() before the seqno reads.
2482	* At all other times, we must assume the GPU is still running, but
2483	* we only care about the snapshot of this moment.
2484	*/
2485	lockdep_assert_held(&engine->sched_engine->lock);
2486
2487	rcu_read_lock();
2488	request = execlists_active(execlists: &engine->execlists);
2489	if (request) {
2490	struct intel_timeline *tl = request->context->timeline;
2491
2492	list_for_each_entry_from_reverse(request, &tl->requests, link) {
2493	if (__i915_request_is_complete(rq: request))
2494	break;
2495
2496	active = request;
2497	}
2498	}
2499	rcu_read_unlock();
2500	if (active)
2501	return active;
2502
2503	list_for_each_entry(request, &engine->sched_engine->requests,
2504	sched.link) {
2505	if (i915_test_request_state(rq: request) != I915_REQUEST_ACTIVE)
2506	continue;
2507
2508	active = request;
2509	break;
2510	}
2511
2512	return active;
2513	}
2514
2515	void intel_engine_get_hung_entity(struct intel_engine_cs *engine,
2516	struct intel_context ce, struct i915_request rq)
2517	{
2518	unsigned long flags;
2519
2520	*ce = intel_engine_get_hung_context(engine);
2521	if (*ce) {
2522	intel_engine_clear_hung_context(engine);
2523
2524	rq = intel_context_get_active_request(ce: ce);
2525	return;
2526	}
2527
2528	/*
2529	* Getting here with GuC enabled means it is a forced error capture
2530	* with no actual hang. So, no need to attempt the execlist search.
2531	*/
2532	if (intel_uc_uses_guc_submission(uc: &engine->gt->uc))
2533	return;
2534
2535	spin_lock_irqsave(&engine->sched_engine->lock, flags);
2536	*rq = engine_execlist_find_hung_request(engine);
2537	if (*rq)
2538	rq = i915_request_get_rcu(rq: rq);
2539	spin_unlock_irqrestore(lock: &engine->sched_engine->lock, flags);
2540	}
2541
2542	void xehp_enable_ccs_engines(struct intel_engine_cs *engine)
2543	{
2544	/*
2545	* If there are any non-fused-off CCS engines, we need to enable CCS
2546	* support in the RCU_MODE register. This only needs to be done once,
2547	* so for simplicity we'll take care of this in the RCS engine's
2548	* resume handler; since the RCS and all CCS engines belong to the
2549	* same reset domain and are reset together, this will also take care
2550	* of re-applying the setting after i915-triggered resets.
2551	*/
2552	if (!CCS_MASK(engine->gt))
2553	return;
2554
2555	intel_uncore_write(uncore: engine->uncore, GEN12_RCU_MODE,
2556	_MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE));
2557	}
2558
2559	#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
2560	#include "mock_engine.c"
2561	#include "selftest_engine.c"
2562	#include "selftest_engine_cs.c"
2563	#endif
2564

source code of linux/drivers/gpu/drm/i915/gt/intel_engine_cs.c