| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright © 2015-2021 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #include <linux/kthread.h> |
| 7 | #include <linux/string_helpers.h> |
| 8 | #include <trace/events/dma_fence.h> |
| 9 | #include <uapi/linux/sched/types.h> |
| 10 | |
| 11 | #include <drm/drm_print.h> |
| 12 | |
| 13 | #include "i915_drv.h" |
| 14 | #include "i915_trace.h" |
| 15 | #include "intel_breadcrumbs.h" |
| 16 | #include "intel_context.h" |
| 17 | #include "intel_engine_pm.h" |
| 18 | #include "intel_gt_pm.h" |
| 19 | #include "intel_gt_requests.h" |
| 20 | |
| 21 | static bool irq_enable(struct intel_breadcrumbs *b) |
| 22 | { |
| 23 | return intel_engine_irq_enable(engine: b->irq_engine); |
| 24 | } |
| 25 | |
| 26 | static void irq_disable(struct intel_breadcrumbs *b) |
| 27 | { |
| 28 | intel_engine_irq_disable(engine: b->irq_engine); |
| 29 | } |
| 30 | |
| 31 | static void __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) |
| 32 | { |
| 33 | intel_wakeref_t wakeref; |
| 34 | |
| 35 | /* |
| 36 | * Since we are waiting on a request, the GPU should be busy |
| 37 | * and should have its own rpm reference. |
| 38 | */ |
| 39 | wakeref = intel_gt_pm_get_if_awake(gt: b->irq_engine->gt); |
| 40 | if (GEM_WARN_ON(!wakeref)) |
| 41 | return; |
| 42 | |
| 43 | /* |
| 44 | * The breadcrumb irq will be disarmed on the interrupt after the |
| 45 | * waiters are signaled. This gives us a single interrupt window in |
| 46 | * which we can add a new waiter and avoid the cost of re-enabling |
| 47 | * the irq. |
| 48 | */ |
| 49 | WRITE_ONCE(b->irq_armed, wakeref); |
| 50 | |
| 51 | /* Requests may have completed before we could enable the interrupt. */ |
| 52 | if (!b->irq_enabled++ && b->irq_enable(b)) |
| 53 | irq_work_queue(work: &b->irq_work); |
| 54 | } |
| 55 | |
| 56 | static void intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b) |
| 57 | { |
| 58 | if (!b->irq_engine) |
| 59 | return; |
| 60 | |
| 61 | spin_lock(lock: &b->irq_lock); |
| 62 | if (!b->irq_armed) |
| 63 | __intel_breadcrumbs_arm_irq(b); |
| 64 | spin_unlock(lock: &b->irq_lock); |
| 65 | } |
| 66 | |
| 67 | static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) |
| 68 | { |
| 69 | intel_wakeref_t wakeref = b->irq_armed; |
| 70 | |
| 71 | GEM_BUG_ON(!b->irq_enabled); |
| 72 | if (!--b->irq_enabled) |
| 73 | b->irq_disable(b); |
| 74 | |
| 75 | WRITE_ONCE(b->irq_armed, NULL); |
| 76 | intel_gt_pm_put_async(gt: b->irq_engine->gt, handle: wakeref); |
| 77 | } |
| 78 | |
| 79 | static void intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b) |
| 80 | { |
| 81 | spin_lock(lock: &b->irq_lock); |
| 82 | if (b->irq_armed) |
| 83 | __intel_breadcrumbs_disarm_irq(b); |
| 84 | spin_unlock(lock: &b->irq_lock); |
| 85 | } |
| 86 | |
| 87 | static void add_signaling_context(struct intel_breadcrumbs *b, |
| 88 | struct intel_context *ce) |
| 89 | { |
| 90 | lockdep_assert_held(&ce->signal_lock); |
| 91 | |
| 92 | spin_lock(lock: &b->signalers_lock); |
| 93 | list_add_rcu(new: &ce->signal_link, head: &b->signalers); |
| 94 | spin_unlock(lock: &b->signalers_lock); |
| 95 | } |
| 96 | |
| 97 | static bool remove_signaling_context(struct intel_breadcrumbs *b, |
| 98 | struct intel_context *ce) |
| 99 | { |
| 100 | lockdep_assert_held(&ce->signal_lock); |
| 101 | |
| 102 | if (!list_empty(head: &ce->signals)) |
| 103 | return false; |
| 104 | |
| 105 | spin_lock(lock: &b->signalers_lock); |
| 106 | list_del_rcu(entry: &ce->signal_link); |
| 107 | spin_unlock(lock: &b->signalers_lock); |
| 108 | |
| 109 | return true; |
| 110 | } |
| 111 | |
| 112 | __maybe_unused static bool |
| 113 | check_signal_order(struct intel_context *ce, struct i915_request *rq) |
| 114 | { |
| 115 | if (rq->context != ce) |
| 116 | return false; |
| 117 | |
| 118 | if (!list_is_last(list: &rq->signal_link, head: &ce->signals) && |
| 119 | i915_seqno_passed(seq1: rq->fence.seqno, |
| 120 | list_next_entry(rq, signal_link)->fence.seqno)) |
| 121 | return false; |
| 122 | |
| 123 | if (!list_is_first(list: &rq->signal_link, head: &ce->signals) && |
| 124 | i915_seqno_passed(list_prev_entry(rq, signal_link)->fence.seqno, |
| 125 | seq2: rq->fence.seqno)) |
| 126 | return false; |
| 127 | |
| 128 | return true; |
| 129 | } |
| 130 | |
| 131 | static bool |
| 132 | __dma_fence_signal(struct dma_fence *fence) |
| 133 | { |
| 134 | return !test_and_set_bit(nr: DMA_FENCE_FLAG_SIGNALED_BIT, addr: &fence->flags); |
| 135 | } |
| 136 | |
| 137 | static void |
| 138 | __dma_fence_signal__timestamp(struct dma_fence *fence, ktime_t timestamp) |
| 139 | { |
| 140 | fence->timestamp = timestamp; |
| 141 | set_bit(nr: DMA_FENCE_FLAG_TIMESTAMP_BIT, addr: &fence->flags); |
| 142 | trace_dma_fence_signaled(fence); |
| 143 | } |
| 144 | |
| 145 | static void |
| 146 | __dma_fence_signal__notify(struct dma_fence *fence, |
| 147 | const struct list_head *list) |
| 148 | { |
| 149 | struct dma_fence_cb *cur, *tmp; |
| 150 | |
| 151 | lockdep_assert_held(fence->lock); |
| 152 | |
| 153 | list_for_each_entry_safe(cur, tmp, list, node) { |
| 154 | INIT_LIST_HEAD(list: &cur->node); |
| 155 | cur->func(fence, cur); |
| 156 | } |
| 157 | } |
| 158 | |
| 159 | static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) |
| 160 | { |
| 161 | if (b->irq_engine) |
| 162 | intel_engine_add_retire(engine: b->irq_engine, tl); |
| 163 | } |
| 164 | |
| 165 | static struct llist_node * |
| 166 | slist_add(struct llist_node *node, struct llist_node *head) |
| 167 | { |
| 168 | node->next = head; |
| 169 | return node; |
| 170 | } |
| 171 | |
| 172 | static void signal_irq_work(struct irq_work *work) |
| 173 | { |
| 174 | struct intel_breadcrumbs *b = container_of(work, typeof(*b), irq_work); |
| 175 | const ktime_t timestamp = ktime_get(); |
| 176 | struct llist_node *signal, *sn; |
| 177 | struct intel_context *ce; |
| 178 | |
| 179 | signal = NULL; |
| 180 | if (unlikely(!llist_empty(&b->signaled_requests))) |
| 181 | signal = llist_del_all(head: &b->signaled_requests); |
| 182 | |
| 183 | /* |
| 184 | * Keep the irq armed until the interrupt after all listeners are gone. |
| 185 | * |
| 186 | * Enabling/disabling the interrupt is rather costly, roughly a couple |
| 187 | * of hundred microseconds. If we are proactive and enable/disable |
| 188 | * the interrupt around every request that wants a breadcrumb, we |
| 189 | * quickly drown in the extra orders of magnitude of latency imposed |
| 190 | * on request submission. |
| 191 | * |
| 192 | * So we try to be lazy, and keep the interrupts enabled until no |
| 193 | * more listeners appear within a breadcrumb interrupt interval (that |
| 194 | * is until a request completes that no one cares about). The |
| 195 | * observation is that listeners come in batches, and will often |
| 196 | * listen to a bunch of requests in succession. Though note on icl+, |
| 197 | * interrupts are always enabled due to concerns with rc6 being |
| 198 | * dysfunctional with per-engine interrupt masking. |
| 199 | * |
| 200 | * We also try to avoid raising too many interrupts, as they may |
| 201 | * be generated by userspace batches and it is unfortunately rather |
| 202 | * too easy to drown the CPU under a flood of GPU interrupts. Thus |
| 203 | * whenever no one appears to be listening, we turn off the interrupts. |
| 204 | * Fewer interrupts should conserve power -- at the very least, fewer |
| 205 | * interrupt draw less ire from other users of the system and tools |
| 206 | * like powertop. |
| 207 | */ |
| 208 | if (!signal && READ_ONCE(b->irq_armed) && list_empty(head: &b->signalers)) |
| 209 | intel_breadcrumbs_disarm_irq(b); |
| 210 | |
| 211 | rcu_read_lock(); |
| 212 | atomic_inc(v: &b->signaler_active); |
| 213 | list_for_each_entry_rcu(ce, &b->signalers, signal_link) { |
| 214 | struct i915_request *rq; |
| 215 | |
| 216 | list_for_each_entry_rcu(rq, &ce->signals, signal_link) { |
| 217 | bool release; |
| 218 | |
| 219 | if (!__i915_request_is_complete(rq)) |
| 220 | break; |
| 221 | |
| 222 | if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL, |
| 223 | addr: &rq->fence.flags)) |
| 224 | break; |
| 225 | |
| 226 | /* |
| 227 | * Queue for execution after dropping the signaling |
| 228 | * spinlock as the callback chain may end up adding |
| 229 | * more signalers to the same context or engine. |
| 230 | */ |
| 231 | spin_lock(lock: &ce->signal_lock); |
| 232 | list_del_rcu(entry: &rq->signal_link); |
| 233 | release = remove_signaling_context(b, ce); |
| 234 | spin_unlock(lock: &ce->signal_lock); |
| 235 | if (release) { |
| 236 | if (intel_timeline_is_last(tl: ce->timeline, rq)) |
| 237 | add_retire(b, tl: ce->timeline); |
| 238 | intel_context_put(ce); |
| 239 | } |
| 240 | |
| 241 | if (__dma_fence_signal(fence: &rq->fence)) |
| 242 | /* We own signal_node now, xfer to local list */ |
| 243 | signal = slist_add(node: &rq->signal_node, head: signal); |
| 244 | else |
| 245 | i915_request_put(rq); |
| 246 | } |
| 247 | } |
| 248 | atomic_dec(v: &b->signaler_active); |
| 249 | rcu_read_unlock(); |
| 250 | |
| 251 | llist_for_each_safe(signal, sn, signal) { |
| 252 | struct i915_request *rq = |
| 253 | llist_entry(signal, typeof(*rq), signal_node); |
| 254 | struct list_head cb_list; |
| 255 | |
| 256 | if (rq->engine->sched_engine->retire_inflight_request_prio) |
| 257 | rq->engine->sched_engine->retire_inflight_request_prio(rq); |
| 258 | |
| 259 | spin_lock(lock: &rq->lock); |
| 260 | list_replace(old: &rq->fence.cb_list, new: &cb_list); |
| 261 | __dma_fence_signal__timestamp(fence: &rq->fence, timestamp); |
| 262 | __dma_fence_signal__notify(fence: &rq->fence, list: &cb_list); |
| 263 | spin_unlock(lock: &rq->lock); |
| 264 | |
| 265 | i915_request_put(rq); |
| 266 | } |
| 267 | |
| 268 | /* Lazy irq enabling after HW submission */ |
| 269 | if (!READ_ONCE(b->irq_armed) && !list_empty(head: &b->signalers)) |
| 270 | intel_breadcrumbs_arm_irq(b); |
| 271 | |
| 272 | /* And confirm that we still want irqs enabled before we yield */ |
| 273 | if (READ_ONCE(b->irq_armed) && !atomic_read(v: &b->active)) |
| 274 | intel_breadcrumbs_disarm_irq(b); |
| 275 | } |
| 276 | |
| 277 | struct intel_breadcrumbs * |
| 278 | intel_breadcrumbs_create(struct intel_engine_cs *irq_engine) |
| 279 | { |
| 280 | struct intel_breadcrumbs *b; |
| 281 | |
| 282 | b = kzalloc(sizeof(*b), GFP_KERNEL); |
| 283 | if (!b) |
| 284 | return NULL; |
| 285 | |
| 286 | kref_init(kref: &b->ref); |
| 287 | |
| 288 | spin_lock_init(&b->signalers_lock); |
| 289 | INIT_LIST_HEAD(list: &b->signalers); |
| 290 | init_llist_head(list: &b->signaled_requests); |
| 291 | |
| 292 | spin_lock_init(&b->irq_lock); |
| 293 | init_irq_work(work: &b->irq_work, func: signal_irq_work); |
| 294 | |
| 295 | b->irq_engine = irq_engine; |
| 296 | b->irq_enable = irq_enable; |
| 297 | b->irq_disable = irq_disable; |
| 298 | |
| 299 | return b; |
| 300 | } |
| 301 | |
| 302 | void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) |
| 303 | { |
| 304 | unsigned long flags; |
| 305 | |
| 306 | if (!b->irq_engine) |
| 307 | return; |
| 308 | |
| 309 | spin_lock_irqsave(&b->irq_lock, flags); |
| 310 | |
| 311 | if (b->irq_enabled) |
| 312 | b->irq_enable(b); |
| 313 | else |
| 314 | b->irq_disable(b); |
| 315 | |
| 316 | spin_unlock_irqrestore(lock: &b->irq_lock, flags); |
| 317 | } |
| 318 | |
| 319 | void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) |
| 320 | { |
| 321 | if (!READ_ONCE(b->irq_armed)) |
| 322 | return; |
| 323 | |
| 324 | /* Kick the work once more to drain the signalers, and disarm the irq */ |
| 325 | irq_work_queue(work: &b->irq_work); |
| 326 | } |
| 327 | |
| 328 | void intel_breadcrumbs_free(struct kref *kref) |
| 329 | { |
| 330 | struct intel_breadcrumbs *b = container_of(kref, typeof(*b), ref); |
| 331 | |
| 332 | irq_work_sync(work: &b->irq_work); |
| 333 | GEM_BUG_ON(!list_empty(&b->signalers)); |
| 334 | GEM_BUG_ON(b->irq_armed); |
| 335 | |
| 336 | kfree(objp: b); |
| 337 | } |
| 338 | |
| 339 | static void irq_signal_request(struct i915_request *rq, |
| 340 | struct intel_breadcrumbs *b) |
| 341 | { |
| 342 | if (!__dma_fence_signal(fence: &rq->fence)) |
| 343 | return; |
| 344 | |
| 345 | i915_request_get(rq); |
| 346 | if (llist_add(new: &rq->signal_node, head: &b->signaled_requests)) |
| 347 | irq_work_queue(work: &b->irq_work); |
| 348 | } |
| 349 | |
| 350 | static void insert_breadcrumb(struct i915_request *rq) |
| 351 | { |
| 352 | struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; |
| 353 | struct intel_context *ce = rq->context; |
| 354 | struct list_head *pos; |
| 355 | |
| 356 | if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) |
| 357 | return; |
| 358 | |
| 359 | /* |
| 360 | * If the request is already completed, we can transfer it |
| 361 | * straight onto a signaled list, and queue the irq worker for |
| 362 | * its signal completion. |
| 363 | */ |
| 364 | if (__i915_request_is_complete(rq)) { |
| 365 | irq_signal_request(rq, b); |
| 366 | return; |
| 367 | } |
| 368 | |
| 369 | if (list_empty(head: &ce->signals)) { |
| 370 | intel_context_get(ce); |
| 371 | add_signaling_context(b, ce); |
| 372 | pos = &ce->signals; |
| 373 | } else { |
| 374 | /* |
| 375 | * We keep the seqno in retirement order, so we can break |
| 376 | * inside intel_engine_signal_breadcrumbs as soon as we've |
| 377 | * passed the last completed request (or seen a request that |
| 378 | * hasn't event started). We could walk the timeline->requests, |
| 379 | * but keeping a separate signalers_list has the advantage of |
| 380 | * hopefully being much smaller than the full list and so |
| 381 | * provides faster iteration and detection when there are no |
| 382 | * more interrupts required for this context. |
| 383 | * |
| 384 | * We typically expect to add new signalers in order, so we |
| 385 | * start looking for our insertion point from the tail of |
| 386 | * the list. |
| 387 | */ |
| 388 | list_for_each_prev(pos, &ce->signals) { |
| 389 | struct i915_request *it = |
| 390 | list_entry(pos, typeof(*it), signal_link); |
| 391 | |
| 392 | if (i915_seqno_passed(seq1: rq->fence.seqno, seq2: it->fence.seqno)) |
| 393 | break; |
| 394 | } |
| 395 | } |
| 396 | |
| 397 | i915_request_get(rq); |
| 398 | list_add_rcu(new: &rq->signal_link, head: pos); |
| 399 | GEM_BUG_ON(!check_signal_order(ce, rq)); |
| 400 | GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); |
| 401 | set_bit(nr: I915_FENCE_FLAG_SIGNAL, addr: &rq->fence.flags); |
| 402 | |
| 403 | /* |
| 404 | * Defer enabling the interrupt to after HW submission and recheck |
| 405 | * the request as it may have completed and raised the interrupt as |
| 406 | * we were attaching it into the lists. |
| 407 | */ |
| 408 | if (!READ_ONCE(b->irq_armed) || __i915_request_is_complete(rq)) |
| 409 | irq_work_queue(work: &b->irq_work); |
| 410 | } |
| 411 | |
| 412 | bool i915_request_enable_breadcrumb(struct i915_request *rq) |
| 413 | { |
| 414 | struct intel_context *ce = rq->context; |
| 415 | |
| 416 | /* Serialises with i915_request_retire() using rq->lock */ |
| 417 | if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) |
| 418 | return true; |
| 419 | |
| 420 | /* |
| 421 | * Peek at i915_request_submit()/i915_request_unsubmit() status. |
| 422 | * |
| 423 | * If the request is not yet active (and not signaled), we will |
| 424 | * attach the breadcrumb later. |
| 425 | */ |
| 426 | if (!test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) |
| 427 | return true; |
| 428 | |
| 429 | spin_lock(lock: &ce->signal_lock); |
| 430 | if (test_bit(I915_FENCE_FLAG_ACTIVE, &rq->fence.flags)) |
| 431 | insert_breadcrumb(rq); |
| 432 | spin_unlock(lock: &ce->signal_lock); |
| 433 | |
| 434 | return true; |
| 435 | } |
| 436 | |
| 437 | void i915_request_cancel_breadcrumb(struct i915_request *rq) |
| 438 | { |
| 439 | struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; |
| 440 | struct intel_context *ce = rq->context; |
| 441 | bool release; |
| 442 | |
| 443 | spin_lock(lock: &ce->signal_lock); |
| 444 | if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL, addr: &rq->fence.flags)) { |
| 445 | spin_unlock(lock: &ce->signal_lock); |
| 446 | return; |
| 447 | } |
| 448 | |
| 449 | list_del_rcu(entry: &rq->signal_link); |
| 450 | release = remove_signaling_context(b, ce); |
| 451 | spin_unlock(lock: &ce->signal_lock); |
| 452 | if (release) |
| 453 | intel_context_put(ce); |
| 454 | |
| 455 | if (__i915_request_is_complete(rq)) |
| 456 | irq_signal_request(rq, b); |
| 457 | |
| 458 | i915_request_put(rq); |
| 459 | } |
| 460 | |
| 461 | void intel_context_remove_breadcrumbs(struct intel_context *ce, |
| 462 | struct intel_breadcrumbs *b) |
| 463 | { |
| 464 | struct i915_request *rq, *rn; |
| 465 | bool release = false; |
| 466 | unsigned long flags; |
| 467 | |
| 468 | spin_lock_irqsave(&ce->signal_lock, flags); |
| 469 | |
| 470 | if (list_empty(head: &ce->signals)) |
| 471 | goto unlock; |
| 472 | |
| 473 | list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) { |
| 474 | GEM_BUG_ON(!__i915_request_is_complete(rq)); |
| 475 | if (!test_and_clear_bit(nr: I915_FENCE_FLAG_SIGNAL, |
| 476 | addr: &rq->fence.flags)) |
| 477 | continue; |
| 478 | |
| 479 | list_del_rcu(entry: &rq->signal_link); |
| 480 | irq_signal_request(rq, b); |
| 481 | i915_request_put(rq); |
| 482 | } |
| 483 | release = remove_signaling_context(b, ce); |
| 484 | |
| 485 | unlock: |
| 486 | spin_unlock_irqrestore(lock: &ce->signal_lock, flags); |
| 487 | if (release) |
| 488 | intel_context_put(ce); |
| 489 | |
| 490 | while (atomic_read(v: &b->signaler_active)) |
| 491 | cpu_relax(); |
| 492 | } |
| 493 | |
| 494 | static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) |
| 495 | { |
| 496 | struct intel_context *ce; |
| 497 | struct i915_request *rq; |
| 498 | |
| 499 | drm_printf(p, f: "Signals:\n" ); |
| 500 | |
| 501 | rcu_read_lock(); |
| 502 | list_for_each_entry_rcu(ce, &b->signalers, signal_link) { |
| 503 | list_for_each_entry_rcu(rq, &ce->signals, signal_link) |
| 504 | drm_printf(p, f: "\t[%llx:%llx%s] @ %dms\n" , |
| 505 | rq->fence.context, rq->fence.seqno, |
| 506 | __i915_request_is_complete(rq) ? "!" : |
| 507 | __i915_request_has_started(rq) ? "*" : |
| 508 | "" , |
| 509 | jiffies_to_msecs(j: jiffies - rq->emitted_jiffies)); |
| 510 | } |
| 511 | rcu_read_unlock(); |
| 512 | } |
| 513 | |
| 514 | void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, |
| 515 | struct drm_printer *p) |
| 516 | { |
| 517 | struct intel_breadcrumbs *b; |
| 518 | |
| 519 | b = engine->breadcrumbs; |
| 520 | if (!b) |
| 521 | return; |
| 522 | |
| 523 | drm_printf(p, f: "IRQ: %s\n" , str_enabled_disabled(v: b->irq_armed)); |
| 524 | if (!list_empty(head: &b->signalers)) |
| 525 | print_signals(b, p); |
| 526 | } |
| 527 | |