gen2_engine_cs.c source code [linux/drivers/gpu/drm/i915/gt/gen2_engine_cs.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2020 Intel Corporation
4	*/
5
6	#include "gen2_engine_cs.h"
7	#include "i915_drv.h"
8	#include "i915_reg.h"
9	#include "intel_engine.h"
10	#include "intel_engine_regs.h"
11	#include "intel_gpu_commands.h"
12	#include "intel_gt.h"
13	#include "intel_gt_irq.h"
14	#include "intel_ring.h"
15
16	int gen2_emit_flush(struct i915_request *rq, u32 mode)
17	{
18	unsigned int num_store_dw = `12`;
19	u32 cmd, *cs;
20
21	cmd = MI_FLUSH;
22	if (mode & EMIT_INVALIDATE)
23	cmd \|= MI_READ_FLUSH;
24
25	cs = intel_ring_begin(rq, num_dwords: `2` + `4` * num_store_dw);
26	if (IS_ERR(ptr: cs))
27	return PTR_ERR(ptr: cs);
28
29	*cs++ = cmd;
30	while (num_store_dw--) {
31	*cs++ = MI_STORE_DWORD_INDEX;
32	cs++ = I915_GEM_HWS_SCRATCH sizeof(u32);
33	*cs++ = `0`;
34	*cs++ = MI_FLUSH \| MI_NO_WRITE_FLUSH;
35	}
36	*cs++ = cmd;
37
38	intel_ring_advance(rq, cs);
39
40	return `0`;
41	}
42
43	int gen4_emit_flush_rcs(struct i915_request *rq, u32 mode)
44	{
45	u32 cmd, *cs;
46	int i;
47
48	/*
49	* read/write caches:
50	*
51	* I915_GEM_DOMAIN_RENDER is always invalidated, but is
52	* only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
53	* also flushed at 2d versus 3d pipeline switches.
54	*
55	* read-only caches:
56	*
57	* I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
58	* MI_READ_FLUSH is set, and is always flushed on 965.
59	*
60	* I915_GEM_DOMAIN_COMMAND may not exist?
61	*
62	* I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
63	* invalidated when MI_EXE_FLUSH is set.
64	*
65	* I915_GEM_DOMAIN_VERTEX, which exists on 965, is
66	* invalidated with every MI_FLUSH.
67	*
68	* TLBs:
69	*
70	* On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
71	* and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
72	* I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
73	* are flushed at any MI_FLUSH.
74	*/
75
76	cmd = MI_FLUSH;
77	if (mode & EMIT_INVALIDATE) {
78	cmd \|= MI_EXE_FLUSH;
79	if (IS_G4X(rq->i915) \|\| GRAPHICS_VER(rq->i915) == `5`)
80	cmd \|= MI_INVALIDATE_ISP;
81	}
82
83	i = `2`;
84	if (mode & EMIT_INVALIDATE)
85	i += `20`;
86
87	cs = intel_ring_begin(rq, num_dwords: i);
88	if (IS_ERR(ptr: cs))
89	return PTR_ERR(ptr: cs);
90
91	*cs++ = cmd;
92
93	/*
94	* A random delay to let the CS invalidate take effect? Without this
95	* delay, the GPU relocation path fails as the CS does not see
96	* the updated contents. Just as important, if we apply the flushes
97	* to the EMIT_FLUSH branch (i.e. immediately after the relocation
98	* write and before the invalidate on the next batch), the relocations
99	* still fail. This implies that is a delay following invalidation
100	* that is required to reset the caches as opposed to a delay to
101	* ensure the memory is written.
102	*/
103	if (mode & EMIT_INVALIDATE) {
104	*cs++ = GFX_OP_PIPE_CONTROL(`4`) \| PIPE_CONTROL_QW_WRITE;
105	*cs++ = intel_gt_scratch_offset(gt: rq->engine->gt,
106	field: INTEL_GT_SCRATCH_FIELD_DEFAULT) \|
107	PIPE_CONTROL_GLOBAL_GTT;
108	*cs++ = `0`;
109	*cs++ = `0`;
110
111	for (i = `0`; i < `12`; i++)
112	*cs++ = MI_FLUSH;
113
114	*cs++ = GFX_OP_PIPE_CONTROL(`4`) \| PIPE_CONTROL_QW_WRITE;
115	*cs++ = intel_gt_scratch_offset(gt: rq->engine->gt,
116	field: INTEL_GT_SCRATCH_FIELD_DEFAULT) \|
117	PIPE_CONTROL_GLOBAL_GTT;
118	*cs++ = `0`;
119	*cs++ = `0`;
120	}
121
122	*cs++ = cmd;
123
124	intel_ring_advance(rq, cs);
125
126	return `0`;
127	}
128
129	int gen4_emit_flush_vcs(struct i915_request *rq, u32 mode)
130	{
131	u32 *cs;
132
133	cs = intel_ring_begin(rq, num_dwords: `2`);
134	if (IS_ERR(ptr: cs))
135	return PTR_ERR(ptr: cs);
136
137	*cs++ = MI_FLUSH;
138	*cs++ = MI_NOOP;
139	intel_ring_advance(rq, cs);
140
141	return `0`;
142	}
143
144	static u32 __gen2_emit_breadcrumb(struct* i915_request rq, u32 cs,
145	int flush, int post)
146	{
147	GEM_BUG_ON(i915_request_active_timeline(rq)->hwsp_ggtt != rq->engine->status_page.vma);
148	GEM_BUG_ON(offset_in_page(rq->hwsp_seqno) != I915_GEM_HWS_SEQNO_ADDR);
149
150	*cs++ = MI_FLUSH;
151
152	while (flush--) {
153	*cs++ = MI_STORE_DWORD_INDEX;
154	cs++ = I915_GEM_HWS_SCRATCH sizeof(u32);
155	*cs++ = rq->fence.seqno;
156	}
157
158	while (post--) {
159	*cs++ = MI_STORE_DWORD_INDEX;
160	*cs++ = I915_GEM_HWS_SEQNO_ADDR;
161	*cs++ = rq->fence.seqno;
162	}
163
164	*cs++ = MI_USER_INTERRUPT;
165
166	rq->tail = intel_ring_offset(rq, addr: cs);
167	assert_ring_tail_valid(ring: rq->ring, tail: rq->tail);
168
169	return cs;
170	}
171
172	u32 gen2_emit_breadcrumb(struct* i915_request rq, u32 cs)
173	{
174	return __gen2_emit_breadcrumb(rq, cs, flush: `16`, post: `8`);
175	}
176
177	u32 gen5_emit_breadcrumb(struct* i915_request rq, u32 cs)
178	{
179	return __gen2_emit_breadcrumb(rq, cs, flush: `8`, post: `8`);
180	}
181
182	/ Just userspace ABI convention to limit the wa batch bo to a reasonable size /
183	#define I830_BATCH_LIMIT SZ_256K
184	#define I830_TLB_ENTRIES (2)
185	#define I830_WA_SIZE max(I830_TLB_ENTRIES * SZ_4K, I830_BATCH_LIMIT)
186	int i830_emit_bb_start(struct i915_request *rq,
187	u64 offset, u32 len,
188	unsigned int dispatch_flags)
189	{
190	u32 *cs, cs_offset =
191	intel_gt_scratch_offset(gt: rq->engine->gt,
192	field: INTEL_GT_SCRATCH_FIELD_DEFAULT);
193
194	GEM_BUG_ON(rq->engine->gt->scratch->size < I830_WA_SIZE);
195
196	cs = intel_ring_begin(rq, num_dwords: `6`);
197	if (IS_ERR(ptr: cs))
198	return PTR_ERR(ptr: cs);
199
200	/ Evict the invalid PTE TLBs /
201	*cs++ = COLOR_BLT_CMD \| BLT_WRITE_RGBA;
202	*cs++ = BLT_DEPTH_32 \| BLT_ROP_COLOR_COPY \| `4096`;
203	cs++ = I830_TLB_ENTRIES << `16` \| `4`; /* load each page /
204	*cs++ = cs_offset;
205	*cs++ = `0xdeadbeef`;
206	*cs++ = MI_NOOP;
207	intel_ring_advance(rq, cs);
208
209	if ((dispatch_flags & I915_DISPATCH_PINNED) == `0`) {
210	if (len > I830_BATCH_LIMIT)
211	return -ENOSPC;
212
213	cs = intel_ring_begin(rq, num_dwords: `6` + `2`);
214	if (IS_ERR(ptr: cs))
215	return PTR_ERR(ptr: cs);
216
217	/*
218	* Blit the batch (which has now all relocs applied) to the
219	* stable batch scratch bo area (so that the CS never
220	* stumbles over its tlb invalidation bug) ...
221	*/
222	*cs++ = SRC_COPY_BLT_CMD \| BLT_WRITE_RGBA \| (`6` - `2`);
223	*cs++ = BLT_DEPTH_32 \| BLT_ROP_SRC_COPY \| `4096`;
224	*cs++ = DIV_ROUND_UP(len, `4096`) << `16` \| `4096`;
225	*cs++ = cs_offset;
226	*cs++ = `4096`;
227	*cs++ = offset;
228
229	*cs++ = MI_FLUSH;
230	*cs++ = MI_NOOP;
231	intel_ring_advance(rq, cs);
232
233	/ ... and execute it. /
234	offset = cs_offset;
235	}
236
237	if (!(dispatch_flags & I915_DISPATCH_SECURE))
238	offset \|= MI_BATCH_NON_SECURE;
239
240	cs = intel_ring_begin(rq, num_dwords: `2`);
241	if (IS_ERR(ptr: cs))
242	return PTR_ERR(ptr: cs);
243
244	*cs++ = MI_BATCH_BUFFER_START \| MI_BATCH_GTT;
245	*cs++ = offset;
246	intel_ring_advance(rq, cs);
247
248	return `0`;
249	}
250
251	int gen2_emit_bb_start(struct i915_request *rq,
252	u64 offset, u32 len,
253	unsigned int dispatch_flags)
254	{
255	u32 *cs;
256
257	if (!(dispatch_flags & I915_DISPATCH_SECURE))
258	offset \|= MI_BATCH_NON_SECURE;
259
260	cs = intel_ring_begin(rq, num_dwords: `2`);
261	if (IS_ERR(ptr: cs))
262	return PTR_ERR(ptr: cs);
263
264	*cs++ = MI_BATCH_BUFFER_START \| MI_BATCH_GTT;
265	*cs++ = offset;
266	intel_ring_advance(rq, cs);
267
268	return `0`;
269	}
270
271	int gen4_emit_bb_start(struct i915_request *rq,
272	u64 offset, u32 length,
273	unsigned int dispatch_flags)
274	{
275	u32 security;
276	u32 *cs;
277
278	security = MI_BATCH_NON_SECURE_I965;
279	if (dispatch_flags & I915_DISPATCH_SECURE)
280	security = `0`;
281
282	cs = intel_ring_begin(rq, num_dwords: `2`);
283	if (IS_ERR(ptr: cs))
284	return PTR_ERR(ptr: cs);
285
286	*cs++ = MI_BATCH_BUFFER_START \| MI_BATCH_GTT \| security;
287	*cs++ = offset;
288	intel_ring_advance(rq, cs);
289
290	return `0`;
291	}
292
293	void gen2_irq_enable(struct intel_engine_cs *engine)
294	{
295	engine->i915->gen2_imr_mask &= ~engine->irq_enable_mask;
296	intel_uncore_write(uncore: engine->uncore, GEN2_IMR, val: engine->i915->gen2_imr_mask);
297	intel_uncore_posting_read_fw(engine->uncore, GEN2_IMR);
298	}
299
300	void gen2_irq_disable(struct intel_engine_cs *engine)
301	{
302	engine->i915->gen2_imr_mask \|= engine->irq_enable_mask;
303	intel_uncore_write(uncore: engine->uncore, GEN2_IMR, val: engine->i915->gen2_imr_mask);
304	}
305
306	void gen5_irq_enable(struct intel_engine_cs *engine)
307	{
308	gen5_gt_enable_irq(gt: engine->gt, mask: engine->irq_enable_mask);
309	}
310
311	void gen5_irq_disable(struct intel_engine_cs *engine)
312	{
313	gen5_gt_disable_irq(gt: engine->gt, mask: engine->irq_enable_mask);
314	}
315

source code of linux/drivers/gpu/drm/i915/gt/gen2_engine_cs.c