| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright © 2021 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #include "xe_execlist.h" |
| 7 | |
| 8 | #include <drm/drm_managed.h> |
| 9 | |
| 10 | #include "instructions/xe_mi_commands.h" |
| 11 | #include "regs/xe_engine_regs.h" |
| 12 | #include "regs/xe_gt_regs.h" |
| 13 | #include "regs/xe_lrc_layout.h" |
| 14 | #include "xe_assert.h" |
| 15 | #include "xe_bo.h" |
| 16 | #include "xe_device.h" |
| 17 | #include "xe_exec_queue.h" |
| 18 | #include "xe_gt.h" |
| 19 | #include "xe_hw_fence.h" |
| 20 | #include "xe_irq.h" |
| 21 | #include "xe_lrc.h" |
| 22 | #include "xe_macros.h" |
| 23 | #include "xe_mmio.h" |
| 24 | #include "xe_mocs.h" |
| 25 | #include "xe_ring_ops_types.h" |
| 26 | #include "xe_sched_job.h" |
| 27 | |
| 28 | #define XE_EXECLIST_HANG_LIMIT 1 |
| 29 | |
| 30 | #define SW_CTX_ID_SHIFT 37 |
| 31 | #define SW_CTX_ID_WIDTH 11 |
| 32 | #define XEHP_SW_CTX_ID_SHIFT 39 |
| 33 | #define XEHP_SW_CTX_ID_WIDTH 16 |
| 34 | |
| 35 | #define SW_CTX_ID \ |
| 36 | GENMASK_ULL(SW_CTX_ID_WIDTH + SW_CTX_ID_SHIFT - 1, \ |
| 37 | SW_CTX_ID_SHIFT) |
| 38 | |
| 39 | #define XEHP_SW_CTX_ID \ |
| 40 | GENMASK_ULL(XEHP_SW_CTX_ID_WIDTH + XEHP_SW_CTX_ID_SHIFT - 1, \ |
| 41 | XEHP_SW_CTX_ID_SHIFT) |
| 42 | |
| 43 | |
| 44 | static void __start_lrc(struct xe_hw_engine *hwe, struct xe_lrc *lrc, |
| 45 | u32 ctx_id) |
| 46 | { |
| 47 | struct xe_gt *gt = hwe->gt; |
| 48 | struct xe_mmio *mmio = >->mmio; |
| 49 | struct xe_device *xe = gt_to_xe(gt); |
| 50 | u64 lrc_desc; |
| 51 | u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE); |
| 52 | |
| 53 | lrc_desc = xe_lrc_descriptor(lrc); |
| 54 | |
| 55 | if (GRAPHICS_VERx100(xe) >= 1250) { |
| 56 | xe_gt_assert(hwe->gt, FIELD_FIT(XEHP_SW_CTX_ID, ctx_id)); |
| 57 | lrc_desc |= FIELD_PREP(XEHP_SW_CTX_ID, ctx_id); |
| 58 | } else { |
| 59 | xe_gt_assert(hwe->gt, FIELD_FIT(SW_CTX_ID, ctx_id)); |
| 60 | lrc_desc |= FIELD_PREP(SW_CTX_ID, ctx_id); |
| 61 | } |
| 62 | |
| 63 | if (hwe->class == XE_ENGINE_CLASS_COMPUTE) |
| 64 | xe_mmio_write32(mmio, RCU_MODE, |
| 65 | _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE)); |
| 66 | |
| 67 | xe_lrc_write_ctx_reg(lrc, CTX_RING_TAIL, val: lrc->ring.tail); |
| 68 | lrc->ring.old_tail = lrc->ring.tail; |
| 69 | |
| 70 | /* |
| 71 | * Make sure the context image is complete before we submit it to HW. |
| 72 | * |
| 73 | * Ostensibly, writes (including the WCB) should be flushed prior to |
| 74 | * an uncached write such as our mmio register access, the empirical |
| 75 | * evidence (esp. on Braswell) suggests that the WC write into memory |
| 76 | * may not be visible to the HW prior to the completion of the UC |
| 77 | * register write and that we may begin execution from the context |
| 78 | * before its image is complete leading to invalid PD chasing. |
| 79 | */ |
| 80 | wmb(); |
| 81 | |
| 82 | xe_mmio_write32(mmio, RING_HWS_PGA(hwe->mmio_base), |
| 83 | val: xe_bo_ggtt_addr(bo: hwe->hwsp)); |
| 84 | xe_mmio_read32(mmio, RING_HWS_PGA(hwe->mmio_base)); |
| 85 | |
| 86 | if (xe_device_has_msix(gt_to_xe(hwe->gt))) |
| 87 | ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE); |
| 88 | xe_mmio_write32(mmio, RING_MODE(hwe->mmio_base), val: ring_mode); |
| 89 | |
| 90 | xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_LO(hwe->mmio_base), |
| 91 | lower_32_bits(lrc_desc)); |
| 92 | xe_mmio_write32(mmio, RING_EXECLIST_SQ_CONTENTS_HI(hwe->mmio_base), |
| 93 | upper_32_bits(lrc_desc)); |
| 94 | xe_mmio_write32(mmio, RING_EXECLIST_CONTROL(hwe->mmio_base), |
| 95 | EL_CTRL_LOAD); |
| 96 | } |
| 97 | |
| 98 | static void __xe_execlist_port_start(struct xe_execlist_port *port, |
| 99 | struct xe_execlist_exec_queue *exl) |
| 100 | { |
| 101 | struct xe_device *xe = gt_to_xe(port->hwe->gt); |
| 102 | int max_ctx = FIELD_MAX(SW_CTX_ID); |
| 103 | |
| 104 | if (GRAPHICS_VERx100(xe) >= 1250) |
| 105 | max_ctx = FIELD_MAX(XEHP_SW_CTX_ID); |
| 106 | |
| 107 | xe_execlist_port_assert_held(port); |
| 108 | |
| 109 | if (port->running_exl != exl || !exl->has_run) { |
| 110 | port->last_ctx_id++; |
| 111 | |
| 112 | /* 0 is reserved for the kernel context */ |
| 113 | if (port->last_ctx_id > max_ctx) |
| 114 | port->last_ctx_id = 1; |
| 115 | } |
| 116 | |
| 117 | __start_lrc(hwe: port->hwe, lrc: exl->q->lrc[0], ctx_id: port->last_ctx_id); |
| 118 | port->running_exl = exl; |
| 119 | exl->has_run = true; |
| 120 | } |
| 121 | |
| 122 | static void __xe_execlist_port_idle(struct xe_execlist_port *port) |
| 123 | { |
| 124 | u32 noop[2] = { MI_NOOP, MI_NOOP }; |
| 125 | |
| 126 | xe_execlist_port_assert_held(port); |
| 127 | |
| 128 | if (!port->running_exl) |
| 129 | return; |
| 130 | |
| 131 | xe_lrc_write_ring(lrc: port->lrc, data: noop, size: sizeof(noop)); |
| 132 | __start_lrc(hwe: port->hwe, lrc: port->lrc, ctx_id: 0); |
| 133 | port->running_exl = NULL; |
| 134 | } |
| 135 | |
| 136 | static bool xe_execlist_is_idle(struct xe_execlist_exec_queue *exl) |
| 137 | { |
| 138 | struct xe_lrc *lrc = exl->q->lrc[0]; |
| 139 | |
| 140 | return lrc->ring.tail == lrc->ring.old_tail; |
| 141 | } |
| 142 | |
| 143 | static void __xe_execlist_port_start_next_active(struct xe_execlist_port *port) |
| 144 | { |
| 145 | struct xe_execlist_exec_queue *exl = NULL; |
| 146 | int i; |
| 147 | |
| 148 | xe_execlist_port_assert_held(port); |
| 149 | |
| 150 | for (i = ARRAY_SIZE(port->active) - 1; i >= 0; i--) { |
| 151 | while (!list_empty(head: &port->active[i])) { |
| 152 | exl = list_first_entry(&port->active[i], |
| 153 | struct xe_execlist_exec_queue, |
| 154 | active_link); |
| 155 | list_del(entry: &exl->active_link); |
| 156 | |
| 157 | if (xe_execlist_is_idle(exl)) { |
| 158 | exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; |
| 159 | continue; |
| 160 | } |
| 161 | |
| 162 | list_add_tail(new: &exl->active_link, head: &port->active[i]); |
| 163 | __xe_execlist_port_start(port, exl); |
| 164 | return; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | __xe_execlist_port_idle(port); |
| 169 | } |
| 170 | |
| 171 | static u64 read_execlist_status(struct xe_hw_engine *hwe) |
| 172 | { |
| 173 | struct xe_gt *gt = hwe->gt; |
| 174 | u32 hi, lo; |
| 175 | |
| 176 | lo = xe_mmio_read32(mmio: >->mmio, RING_EXECLIST_STATUS_LO(hwe->mmio_base)); |
| 177 | hi = xe_mmio_read32(mmio: >->mmio, RING_EXECLIST_STATUS_HI(hwe->mmio_base)); |
| 178 | |
| 179 | return lo | (u64)hi << 32; |
| 180 | } |
| 181 | |
| 182 | static void xe_execlist_port_irq_handler_locked(struct xe_execlist_port *port) |
| 183 | { |
| 184 | u64 status; |
| 185 | |
| 186 | xe_execlist_port_assert_held(port); |
| 187 | |
| 188 | status = read_execlist_status(hwe: port->hwe); |
| 189 | if (status & BIT(7)) |
| 190 | return; |
| 191 | |
| 192 | __xe_execlist_port_start_next_active(port); |
| 193 | } |
| 194 | |
| 195 | static void xe_execlist_port_irq_handler(struct xe_hw_engine *hwe, |
| 196 | u16 intr_vec) |
| 197 | { |
| 198 | struct xe_execlist_port *port = hwe->exl_port; |
| 199 | |
| 200 | spin_lock(lock: &port->lock); |
| 201 | xe_execlist_port_irq_handler_locked(port); |
| 202 | spin_unlock(lock: &port->lock); |
| 203 | } |
| 204 | |
| 205 | static void xe_execlist_port_wake_locked(struct xe_execlist_port *port, |
| 206 | enum xe_exec_queue_priority priority) |
| 207 | { |
| 208 | xe_execlist_port_assert_held(port); |
| 209 | |
| 210 | if (port->running_exl && port->running_exl->active_priority >= priority) |
| 211 | return; |
| 212 | |
| 213 | __xe_execlist_port_start_next_active(port); |
| 214 | } |
| 215 | |
| 216 | static void xe_execlist_make_active(struct xe_execlist_exec_queue *exl) |
| 217 | { |
| 218 | struct xe_execlist_port *port = exl->port; |
| 219 | enum xe_exec_queue_priority priority = exl->q->sched_props.priority; |
| 220 | |
| 221 | XE_WARN_ON(priority == XE_EXEC_QUEUE_PRIORITY_UNSET); |
| 222 | XE_WARN_ON(priority < 0); |
| 223 | XE_WARN_ON(priority >= ARRAY_SIZE(exl->port->active)); |
| 224 | |
| 225 | spin_lock_irq(lock: &port->lock); |
| 226 | |
| 227 | if (exl->active_priority != priority && |
| 228 | exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET) { |
| 229 | /* Priority changed, move it to the right list */ |
| 230 | list_del(entry: &exl->active_link); |
| 231 | exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; |
| 232 | } |
| 233 | |
| 234 | if (exl->active_priority == XE_EXEC_QUEUE_PRIORITY_UNSET) { |
| 235 | exl->active_priority = priority; |
| 236 | list_add_tail(new: &exl->active_link, head: &port->active[priority]); |
| 237 | } |
| 238 | |
| 239 | xe_execlist_port_wake_locked(port: exl->port, priority); |
| 240 | |
| 241 | spin_unlock_irq(lock: &port->lock); |
| 242 | } |
| 243 | |
| 244 | static void xe_execlist_port_irq_fail_timer(struct timer_list *timer) |
| 245 | { |
| 246 | struct xe_execlist_port *port = |
| 247 | container_of(timer, struct xe_execlist_port, irq_fail); |
| 248 | |
| 249 | spin_lock_irq(lock: &port->lock); |
| 250 | xe_execlist_port_irq_handler_locked(port); |
| 251 | spin_unlock_irq(lock: &port->lock); |
| 252 | |
| 253 | port->irq_fail.expires = jiffies + msecs_to_jiffies(m: 1000); |
| 254 | add_timer(timer: &port->irq_fail); |
| 255 | } |
| 256 | |
| 257 | struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe, |
| 258 | struct xe_hw_engine *hwe) |
| 259 | { |
| 260 | struct drm_device *drm = &xe->drm; |
| 261 | struct xe_execlist_port *port; |
| 262 | int i, err; |
| 263 | |
| 264 | port = drmm_kzalloc(dev: drm, size: sizeof(*port), GFP_KERNEL); |
| 265 | if (!port) { |
| 266 | err = -ENOMEM; |
| 267 | goto err; |
| 268 | } |
| 269 | |
| 270 | port->hwe = hwe; |
| 271 | |
| 272 | port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, XE_IRQ_DEFAULT_MSIX, flags: 0); |
| 273 | if (IS_ERR(ptr: port->lrc)) { |
| 274 | err = PTR_ERR(ptr: port->lrc); |
| 275 | goto err; |
| 276 | } |
| 277 | |
| 278 | spin_lock_init(&port->lock); |
| 279 | for (i = 0; i < ARRAY_SIZE(port->active); i++) |
| 280 | INIT_LIST_HEAD(list: &port->active[i]); |
| 281 | |
| 282 | port->last_ctx_id = 1; |
| 283 | port->running_exl = NULL; |
| 284 | |
| 285 | hwe->irq_handler = xe_execlist_port_irq_handler; |
| 286 | |
| 287 | /* TODO: Fix the interrupt code so it doesn't race like mad */ |
| 288 | timer_setup(&port->irq_fail, xe_execlist_port_irq_fail_timer, 0); |
| 289 | port->irq_fail.expires = jiffies + msecs_to_jiffies(m: 1000); |
| 290 | add_timer(timer: &port->irq_fail); |
| 291 | |
| 292 | return port; |
| 293 | |
| 294 | err: |
| 295 | return ERR_PTR(error: err); |
| 296 | } |
| 297 | |
| 298 | void xe_execlist_port_destroy(struct xe_execlist_port *port) |
| 299 | { |
| 300 | timer_delete(timer: &port->irq_fail); |
| 301 | |
| 302 | /* Prevent an interrupt while we're destroying */ |
| 303 | spin_lock_irq(lock: >_to_xe(port->hwe->gt)->irq.lock); |
| 304 | port->hwe->irq_handler = NULL; |
| 305 | spin_unlock_irq(lock: >_to_xe(port->hwe->gt)->irq.lock); |
| 306 | |
| 307 | xe_lrc_put(lrc: port->lrc); |
| 308 | } |
| 309 | |
| 310 | static struct dma_fence * |
| 311 | execlist_run_job(struct drm_sched_job *drm_job) |
| 312 | { |
| 313 | struct xe_sched_job *job = to_xe_sched_job(drm: drm_job); |
| 314 | struct xe_exec_queue *q = job->q; |
| 315 | struct xe_execlist_exec_queue *exl = job->q->execlist; |
| 316 | |
| 317 | q->ring_ops->emit_job(job); |
| 318 | xe_execlist_make_active(exl); |
| 319 | |
| 320 | return job->fence; |
| 321 | } |
| 322 | |
| 323 | static void execlist_job_free(struct drm_sched_job *drm_job) |
| 324 | { |
| 325 | struct xe_sched_job *job = to_xe_sched_job(drm: drm_job); |
| 326 | |
| 327 | xe_exec_queue_update_run_ticks(q: job->q); |
| 328 | xe_sched_job_put(job); |
| 329 | } |
| 330 | |
| 331 | static const struct drm_sched_backend_ops drm_sched_ops = { |
| 332 | .run_job = execlist_run_job, |
| 333 | .free_job = execlist_job_free, |
| 334 | }; |
| 335 | |
| 336 | static int execlist_exec_queue_init(struct xe_exec_queue *q) |
| 337 | { |
| 338 | struct drm_gpu_scheduler *sched; |
| 339 | const struct drm_sched_init_args args = { |
| 340 | .ops = &drm_sched_ops, |
| 341 | .num_rqs = 1, |
| 342 | .credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, |
| 343 | .hang_limit = XE_SCHED_HANG_LIMIT, |
| 344 | .timeout = XE_SCHED_JOB_TIMEOUT, |
| 345 | .name = q->hwe->name, |
| 346 | .dev = gt_to_xe(q->gt)->drm.dev, |
| 347 | }; |
| 348 | struct xe_execlist_exec_queue *exl; |
| 349 | struct xe_device *xe = gt_to_xe(q->gt); |
| 350 | int err; |
| 351 | |
| 352 | xe_assert(xe, !xe_device_uc_enabled(xe)); |
| 353 | |
| 354 | drm_info(&xe->drm, "Enabling execlist submission (GuC submission disabled)\n" ); |
| 355 | |
| 356 | exl = kzalloc(sizeof(*exl), GFP_KERNEL); |
| 357 | if (!exl) |
| 358 | return -ENOMEM; |
| 359 | |
| 360 | exl->q = q; |
| 361 | |
| 362 | err = drm_sched_init(sched: &exl->sched, args: &args); |
| 363 | if (err) |
| 364 | goto err_free; |
| 365 | |
| 366 | sched = &exl->sched; |
| 367 | err = drm_sched_entity_init(entity: &exl->entity, priority: 0, sched_list: &sched, num_sched_list: 1, NULL); |
| 368 | if (err) |
| 369 | goto err_sched; |
| 370 | |
| 371 | exl->port = q->hwe->exl_port; |
| 372 | exl->has_run = false; |
| 373 | exl->active_priority = XE_EXEC_QUEUE_PRIORITY_UNSET; |
| 374 | q->execlist = exl; |
| 375 | q->entity = &exl->entity; |
| 376 | |
| 377 | xe_exec_queue_assign_name(q, ffs(q->logical_mask) - 1); |
| 378 | |
| 379 | return 0; |
| 380 | |
| 381 | err_sched: |
| 382 | drm_sched_fini(sched: &exl->sched); |
| 383 | err_free: |
| 384 | kfree(objp: exl); |
| 385 | return err; |
| 386 | } |
| 387 | |
| 388 | static void execlist_exec_queue_fini(struct xe_exec_queue *q) |
| 389 | { |
| 390 | struct xe_execlist_exec_queue *exl = q->execlist; |
| 391 | |
| 392 | drm_sched_entity_fini(entity: &exl->entity); |
| 393 | drm_sched_fini(sched: &exl->sched); |
| 394 | |
| 395 | kfree(objp: exl); |
| 396 | } |
| 397 | |
| 398 | static void execlist_exec_queue_destroy_async(struct work_struct *w) |
| 399 | { |
| 400 | struct xe_execlist_exec_queue *ee = |
| 401 | container_of(w, struct xe_execlist_exec_queue, destroy_async); |
| 402 | struct xe_exec_queue *q = ee->q; |
| 403 | struct xe_execlist_exec_queue *exl = q->execlist; |
| 404 | struct xe_device *xe = gt_to_xe(q->gt); |
| 405 | unsigned long flags; |
| 406 | |
| 407 | xe_assert(xe, !xe_device_uc_enabled(xe)); |
| 408 | |
| 409 | spin_lock_irqsave(&exl->port->lock, flags); |
| 410 | if (WARN_ON(exl->active_priority != XE_EXEC_QUEUE_PRIORITY_UNSET)) |
| 411 | list_del(entry: &exl->active_link); |
| 412 | spin_unlock_irqrestore(lock: &exl->port->lock, flags); |
| 413 | |
| 414 | xe_exec_queue_fini(q); |
| 415 | } |
| 416 | |
| 417 | static void execlist_exec_queue_kill(struct xe_exec_queue *q) |
| 418 | { |
| 419 | /* NIY */ |
| 420 | } |
| 421 | |
| 422 | static void execlist_exec_queue_destroy(struct xe_exec_queue *q) |
| 423 | { |
| 424 | INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async); |
| 425 | queue_work(wq: system_unbound_wq, work: &q->execlist->destroy_async); |
| 426 | } |
| 427 | |
| 428 | static int execlist_exec_queue_set_priority(struct xe_exec_queue *q, |
| 429 | enum xe_exec_queue_priority priority) |
| 430 | { |
| 431 | /* NIY */ |
| 432 | return 0; |
| 433 | } |
| 434 | |
| 435 | static int execlist_exec_queue_set_timeslice(struct xe_exec_queue *q, u32 timeslice_us) |
| 436 | { |
| 437 | /* NIY */ |
| 438 | return 0; |
| 439 | } |
| 440 | |
| 441 | static int execlist_exec_queue_set_preempt_timeout(struct xe_exec_queue *q, |
| 442 | u32 preempt_timeout_us) |
| 443 | { |
| 444 | /* NIY */ |
| 445 | return 0; |
| 446 | } |
| 447 | |
| 448 | static int execlist_exec_queue_suspend(struct xe_exec_queue *q) |
| 449 | { |
| 450 | /* NIY */ |
| 451 | return 0; |
| 452 | } |
| 453 | |
| 454 | static int execlist_exec_queue_suspend_wait(struct xe_exec_queue *q) |
| 455 | |
| 456 | { |
| 457 | /* NIY */ |
| 458 | return 0; |
| 459 | } |
| 460 | |
| 461 | static void execlist_exec_queue_resume(struct xe_exec_queue *q) |
| 462 | { |
| 463 | /* NIY */ |
| 464 | } |
| 465 | |
| 466 | static bool execlist_exec_queue_reset_status(struct xe_exec_queue *q) |
| 467 | { |
| 468 | /* NIY */ |
| 469 | return false; |
| 470 | } |
| 471 | |
| 472 | static const struct xe_exec_queue_ops execlist_exec_queue_ops = { |
| 473 | .init = execlist_exec_queue_init, |
| 474 | .kill = execlist_exec_queue_kill, |
| 475 | .fini = execlist_exec_queue_fini, |
| 476 | .destroy = execlist_exec_queue_destroy, |
| 477 | .set_priority = execlist_exec_queue_set_priority, |
| 478 | .set_timeslice = execlist_exec_queue_set_timeslice, |
| 479 | .set_preempt_timeout = execlist_exec_queue_set_preempt_timeout, |
| 480 | .suspend = execlist_exec_queue_suspend, |
| 481 | .suspend_wait = execlist_exec_queue_suspend_wait, |
| 482 | .resume = execlist_exec_queue_resume, |
| 483 | .reset_status = execlist_exec_queue_reset_status, |
| 484 | }; |
| 485 | |
| 486 | int xe_execlist_init(struct xe_gt *gt) |
| 487 | { |
| 488 | /* GuC submission enabled, nothing to do */ |
| 489 | if (xe_device_uc_enabled(gt_to_xe(gt))) |
| 490 | return 0; |
| 491 | |
| 492 | gt->exec_queue_ops = &execlist_exec_queue_ops; |
| 493 | |
| 494 | return 0; |
| 495 | } |
| 496 | |