1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2021 Intel Corporation
4 */
5
6#include "xe_execlist.h"
7
8#include <drm/drm_managed.h>
9
10#include "instructions/xe_mi_commands.h"
11#include "regs/xe_engine_regs.h"
12#include "regs/xe_gt_regs.h"
13#include "regs/xe_lrc_layout.h"
14#include "xe_assert.h"
15#include "xe_bo.h"
16#include "xe_device.h"
17#include "xe_exec_queue.h"
18#include "xe_gt.h"
19#include "xe_hw_fence.h"
20#include "xe_irq.h"
21#include "xe_lrc.h"
22#include "xe_macros.h"
23#include "xe_mmio.h"
24#include "xe_mocs.h"
25#include "xe_ring_ops_types.h"
26#include "xe_sched_job.h"
27
28#define XE_EXECLIST_HANG_LIMIT 1
29
30#define SW_CTX_ID_SHIFT 37
31#define SW_CTX_ID_WIDTH 11
32#define XEHP_SW_CTX_ID_SHIFT 39
33#define XEHP_SW_CTX_ID_WIDTH 16
34
35#define SW_CTX_ID \
36 GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \
37 SW_CTX_ID_SHIFT)
38
39#define XEHP_SW_CTX_ID \
40 GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \
41 XEHP_SW_CTX_ID_SHIFT)
42
43
44static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc,
45 u32 ctx_id)
46{
47 struct xe_gt *gt = hwe->gt;
48 struct xe_mmio *mmio = &gt->mmio;
49 struct xe_device *xe = gt_to_xe(gt);
50 u64 lrc_desc;
51 u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
52
53 lrc_desc = xe_lrc_descriptor(lrc);
54
55 if (GRAPHICS_VERx100(xe) >= 1250) {
56 xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id));
57 lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id);
58 } else {
59 xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id));
60 lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id);
61 }
62
63 if (hwe->class == XE_ENGINE_CLASS_COMPUTE)
64 xe_mmio_write32(mmio, RCU_MODE,
65 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
66
67 xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, val: lrc->ring.tail);
68 lrc->ring.old_tail = lrc->ring.tail;
69
70 /*
71 * Make sure the context image is complete before we submit it to HW.
72 *
73 * Ostensibly, writes (including the WCB) should be flushed prior to
74 * an uncached write such as our mmio register access, the empirical
75 * evidence (esp. on Braswell) suggests that the WC write into memory
76 * may not be visible to the HW prior to the completion of the UC
77 * register write and that we may begin execution from the context
78 * before its image is complete leading to invalid PD chasing.
79 */
80 wmb();
81
82 xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base),
83 val: xe_bo_ggtt_addr(bo: hwe->hwsp));
84 xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base));
85
86 if (xe_device_has_msix(gt_to_xe(hwe->gt)))
87 ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
88 xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), val: ring_mode);
89
90 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base),
91 lower_32_bits(lrc_desc));
92 xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base),
93 upper_32_bits(lrc_desc));
94 xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base),
95 EL_CTRL_LOAD);
96}
97
98static void __xe_execlist_port_start(struct xe_execlist_port *port,
99 struct xe_execlist_exec_queue *exl)
100{
101 struct xe_device *xe = gt_to_xe(port->hwe->gt);
102 int max_ctx = FIELD_MAX(SW_CTX_ID);
103
104 if (GRAPHICS_VERx100(xe) >= 1250)
105 max_ctx = FIELD_MAX(XEHP_SW_CTX_ID);
106
107 xe_execlist_port_assert_held(port);
108
109 if (port->running_exl != exl || !exl->has_run) {
110 port->last_ctx_id++;
111
112 /* 0 is reserved for the kernel context */
113 if (port->last_ctx_id > max_ctx)
114 port->last_ctx_id = 1;
115 }
116
117 __start_lrc(hwe: port->hwe, lrc: exl->q->lrc[0], ctx_id: port->last_ctx_id);
118 port->running_exl = exl;
119 exl->has_run = true;
120}
121
122static void __xe_execlist_port_idle(struct xe_execlist_port *port)
123{
124 u32 noop[2] = { MI_NOOP, MI_NOOP };
125
126 xe_execlist_port_assert_held(port);
127
128 if (!port->running_exl)
129 return;
130
131 xe_lrc_write_ring(lrc: port->lrc, data: noop, size: sizeof(noop));
132 __start_lrc(hwe: port->hwe, lrc: port->lrc, ctx_id: 0);
133 port->running_exl = NULL;
134}
135
136static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl)
137{
138 struct xe_lrc *lrc = exl->q->lrc[0];
139
140 return lrc->ring.tail == lrc->ring.old_tail;
141}
142
143static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port)
144{
145 struct xe_execlist_exec_queue *exl = NULL;
146 int i;
147
148 xe_execlist_port_assert_held(port);
149
150 for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) {
151 while (!list_empty(head: &port->active[i])) {
152 exl = list_first_entry(&port->active[i],
153 struct xe_execlist_exec_queue,
154 active_link);
155 list_del(entry: &exl->active_link);
156
157 if (xe_execlist_is_idle(exl)) {
158 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
159 continue;
160 }
161
162 list_add_tail(new: &exl->active_link, head: &port->active[i]);
163 __xe_execlist_port_start(port, exl);
164 return;
165 }
166 }
167
168 __xe_execlist_port_idle(port);
169}
170
171static u64 read_execlist_status(struct xe_hw_engine *hwe)
172{
173 struct xe_gt *gt = hwe->gt;
174 u32 hi, lo;
175
176 lo = xe_mmio_read32(mmio: &gt->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base));
177 hi = xe_mmio_read32(mmio: &gt->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base));
178
179 return lo | (u64)hi << 32;
180}
181
182static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port)
183{
184 u64 status;
185
186 xe_execlist_port_assert_held(port);
187
188 status = read_execlist_status(hwe: port->hwe);
189 if (status & BIT(7))
190 return;
191
192 __xe_execlist_port_start_next_active(port);
193}
194
195static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe,
196 u16 intr_vec)
197{
198 struct xe_execlist_port *port = hwe->exl_port;
199
200 spin_lock(lock: &port->lock);
201 xe_execlist_port_irq_handler_locked(port);
202 spin_unlock(lock: &port->lock);
203}
204
205static void xe_execlist_port_wake_locked(struct xe_execlist_port *port,
206 enum xe_exec_queue_priority priority)
207{
208 xe_execlist_port_assert_held(port);
209
210 if (port->running_exl && port->running_exl->active_priority >= priority)
211 return;
212
213 __xe_execlist_port_start_next_active(port);
214}
215
216static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl)
217{
218 struct xe_execlist_port *port = exl->port;
219 enum xe_exec_queue_priority priority = exl->q->sched_props.priority;
220
221 XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET);
222 XE_WARN_ON(priority < 0);
223 XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active));
224
225 spin_lock_irq(lock: &port->lock);
226
227 if (exl->active_priority != priority &&
228 exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) {
229 /* Priority changed, move it to the right list */
230 list_del(entry: &exl->active_link);
231 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
232 }
233
234 if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) {
235 exl->active_priority = priority;
236 list_add_tail(new: &exl->active_link, head: &port->active[priority]);
237 }
238
239 xe_execlist_port_wake_locked(port: exl->port, priority);
240
241 spin_unlock_irq(lock: &port->lock);
242}
243
244static void xe_execlist_port_irq_fail_timer(struct timer_list *timer)
245{
246 struct xe_execlist_port *port =
247 container_of(timer, struct xe_execlist_port, irq_fail);
248
249 spin_lock_irq(lock: &port->lock);
250 xe_execlist_port_irq_handler_locked(port);
251 spin_unlock_irq(lock: &port->lock);
252
253 port->irq_fail.expires = jiffies + msecs_to_jiffies(m: 1000);
254 add_timer(timer: &port->irq_fail);
255}
256
257struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
258 struct xe_hw_engine *hwe)
259{
260 struct drm_device *drm = &xe->drm;
261 struct xe_execlist_port *port;
262 int i, err;
263
264 port = drmm_kzalloc(dev: drm, size: sizeof(*port), GFP_KERNEL);
265 if (!port) {
266 err = -ENOMEM;
267 goto err;
268 }
269
270 port->hwe = hwe;
271
272 port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, flags: 0);
273 if (IS_ERR(ptr: port->lrc)) {
274 err = PTR_ERR(ptr: port->lrc);
275 goto err;
276 }
277
278 spin_lock_init(&port->lock);
279 for (i = 0; i < ARRAY_SIZE(port->active); i++)
280 INIT_LIST_HEAD(list: &port->active[i]);
281
282 port->last_ctx_id = 1;
283 port->running_exl = NULL;
284
285 hwe->irq_handler = xe_execlist_port_irq_handler;
286
287 /* TODO: Fix the interrupt code so it doesn't race like mad */
288 timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0);
289 port->irq_fail.expires = jiffies + msecs_to_jiffies(m: 1000);
290 add_timer(timer: &port->irq_fail);
291
292 return port;
293
294err:
295 return ERR_PTR(error: err);
296}
297
298void xe_execlist_port_destroy(struct xe_execlist_port *port)
299{
300 timer_delete(timer: &port->irq_fail);
301
302 /* Prevent an interrupt while we're destroying */
303 spin_lock_irq(lock: &gt_to_xe(port->hwe->gt)->irq.lock);
304 port->hwe->irq_handler = NULL;
305 spin_unlock_irq(lock: &gt_to_xe(port->hwe->gt)->irq.lock);
306
307 xe_lrc_put(lrc: port->lrc);
308}
309
310static struct dma_fence *
311execlist_run_job(struct drm_sched_job *drm_job)
312{
313 struct xe_sched_job *job = to_xe_sched_job(drm: drm_job);
314 struct xe_exec_queue *q = job->q;
315 struct xe_execlist_exec_queue *exl = job->q->execlist;
316
317 q->ring_ops->emit_job(job);
318 xe_execlist_make_active(exl);
319
320 return job->fence;
321}
322
323static void execlist_job_free(struct drm_sched_job *drm_job)
324{
325 struct xe_sched_job *job = to_xe_sched_job(drm: drm_job);
326
327 xe_exec_queue_update_run_ticks(q: job->q);
328 xe_sched_job_put(job);
329}
330
331static const struct drm_sched_backend_ops drm_sched_ops = {
332 .run_job = execlist_run_job,
333 .free_job = execlist_job_free,
334};
335
336static int execlist_exec_queue_init(struct xe_exec_queue *q)
337{
338 struct drm_gpu_scheduler *sched;
339 const struct drm_sched_init_args args = {
340 .ops = &drm_sched_ops,
341 .num_rqs = 1,
342 .credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES,
343 .hang_limit = XE_SCHED_HANG_LIMIT,
344 .timeout = XE_SCHED_JOB_TIMEOUT,
345 .name = q->hwe->name,
346 .dev = gt_to_xe(q->gt)->drm.dev,
347 };
348 struct xe_execlist_exec_queue *exl;
349 struct xe_device *xe = gt_to_xe(q->gt);
350 int err;
351
352 xe_assert(xe, !xe_device_uc_enabled(xe));
353
354 drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n");
355
356 exl = kzalloc(sizeof(*exl), GFP_KERNEL);
357 if (!exl)
358 return -ENOMEM;
359
360 exl->q = q;
361
362 err = drm_sched_init(sched: &exl->sched, args: &args);
363 if (err)
364 goto err_free;
365
366 sched = &exl->sched;
367 err = drm_sched_entity_init(entity: &exl->entity, priority: 0, sched_list: &sched, num_sched_list: 1, NULL);
368 if (err)
369 goto err_sched;
370
371 exl->port = q->hwe->exl_port;
372 exl->has_run = false;
373 exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET;
374 q->execlist = exl;
375 q->entity = &exl->entity;
376
377 xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1);
378
379 return 0;
380
381err_sched:
382 drm_sched_fini(sched: &exl->sched);
383err_free:
384 kfree(objp: exl);
385 return err;
386}
387
388static void execlist_exec_queue_fini(struct xe_exec_queue *q)
389{
390 struct xe_execlist_exec_queue *exl = q->execlist;
391
392 drm_sched_entity_fini(entity: &exl->entity);
393 drm_sched_fini(sched: &exl->sched);
394
395 kfree(objp: exl);
396}
397
398static void execlist_exec_queue_destroy_async(struct work_struct *w)
399{
400 struct xe_execlist_exec_queue *ee =
401 container_of(w, struct xe_execlist_exec_queue, destroy_async);
402 struct xe_exec_queue *q = ee->q;
403 struct xe_execlist_exec_queue *exl = q->execlist;
404 struct xe_device *xe = gt_to_xe(q->gt);
405 unsigned long flags;
406
407 xe_assert(xe, !xe_device_uc_enabled(xe));
408
409 spin_lock_irqsave(&exl->port->lock, flags);
410 if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET))
411 list_del(entry: &exl->active_link);
412 spin_unlock_irqrestore(lock: &exl->port->lock, flags);
413
414 xe_exec_queue_fini(q);
415}
416
417static void execlist_exec_queue_kill(struct xe_exec_queue *q)
418{
419 /* NIY */
420}
421
422static void execlist_exec_queue_destroy(struct xe_exec_queue *q)
423{
424 INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async);
425 queue_work(wq: system_unbound_wq, work: &q->execlist->destroy_async);
426}
427
428static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
429 enum xe_exec_queue_priority priority)
430{
431 /* NIY */
432 return 0;
433}
434
435static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us)
436{
437 /* NIY */
438 return 0;
439}
440
441static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q,
442 u32 preempt_timeout_us)
443{
444 /* NIY */
445 return 0;
446}
447
448static int execlist_exec_queue_suspend(struct xe_exec_queue *q)
449{
450 /* NIY */
451 return 0;
452}
453
454static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q)
455
456{
457 /* NIY */
458 return 0;
459}
460
461static void execlist_exec_queue_resume(struct xe_exec_queue *q)
462{
463 /* NIY */
464}
465
466static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q)
467{
468 /* NIY */
469 return false;
470}
471
472static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
473 .init = execlist_exec_queue_init,
474 .kill = execlist_exec_queue_kill,
475 .fini = execlist_exec_queue_fini,
476 .destroy = execlist_exec_queue_destroy,
477 .set_priority = execlist_exec_queue_set_priority,
478 .set_timeslice = execlist_exec_queue_set_timeslice,
479 .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
480 .suspend = execlist_exec_queue_suspend,
481 .suspend_wait = execlist_exec_queue_suspend_wait,
482 .resume = execlist_exec_queue_resume,
483 .reset_status = execlist_exec_queue_reset_status,
484};
485
486int xe_execlist_init(struct xe_gt *gt)
487{
488 /* GuC submission enabled, nothing to do */
489 if (xe_device_uc_enabled(gt_to_xe(gt)))
490 return 0;
491
492 gt->exec_queue_ops = &execlist_exec_queue_ops;
493
494 return 0;
495}
496

source code of linux/drivers/gpu/drm/xe/xe_execlist.c