| 1 | /* |
| 2 | * SPDX-License-Identifier: MIT |
| 3 | * |
| 4 | * Copyright © 2018 Intel Corporation |
| 5 | */ |
| 6 | |
| 7 | #include <linux/mutex.h> |
| 8 | |
| 9 | #include "i915_drv.h" |
| 10 | #include "i915_request.h" |
| 11 | #include "i915_scheduler.h" |
| 12 | |
| 13 | static struct kmem_cache *slab_dependencies; |
| 14 | static struct kmem_cache *slab_priorities; |
| 15 | |
| 16 | static DEFINE_SPINLOCK(schedule_lock); |
| 17 | |
| 18 | static const struct i915_request * |
| 19 | node_to_request(const struct i915_sched_node *node) |
| 20 | { |
| 21 | return container_of(node, const struct i915_request, sched); |
| 22 | } |
| 23 | |
| 24 | static inline bool node_started(const struct i915_sched_node *node) |
| 25 | { |
| 26 | return i915_request_started(rq: node_to_request(node)); |
| 27 | } |
| 28 | |
| 29 | static inline bool node_signaled(const struct i915_sched_node *node) |
| 30 | { |
| 31 | return i915_request_completed(rq: node_to_request(node)); |
| 32 | } |
| 33 | |
| 34 | static inline struct i915_priolist *to_priolist(struct rb_node *rb) |
| 35 | { |
| 36 | return rb_entry(rb, struct i915_priolist, node); |
| 37 | } |
| 38 | |
| 39 | static void assert_priolists(struct i915_sched_engine * const sched_engine) |
| 40 | { |
| 41 | struct rb_node *rb; |
| 42 | long last_prio; |
| 43 | |
| 44 | if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) |
| 45 | return; |
| 46 | |
| 47 | GEM_BUG_ON(rb_first_cached(&sched_engine->queue) != |
| 48 | rb_first(&sched_engine->queue.rb_root)); |
| 49 | |
| 50 | last_prio = INT_MAX; |
| 51 | for (rb = rb_first_cached(&sched_engine->queue); rb; rb = rb_next(rb)) { |
| 52 | const struct i915_priolist *p = to_priolist(rb); |
| 53 | |
| 54 | GEM_BUG_ON(p->priority > last_prio); |
| 55 | last_prio = p->priority; |
| 56 | } |
| 57 | } |
| 58 | |
| 59 | struct list_head * |
| 60 | i915_sched_lookup_priolist(struct i915_sched_engine *sched_engine, int prio) |
| 61 | { |
| 62 | struct i915_priolist *p; |
| 63 | struct rb_node **parent, *rb; |
| 64 | bool first = true; |
| 65 | |
| 66 | lockdep_assert_held(&sched_engine->lock); |
| 67 | assert_priolists(sched_engine); |
| 68 | |
| 69 | if (unlikely(sched_engine->no_priolist)) |
| 70 | prio = I915_PRIORITY_NORMAL; |
| 71 | |
| 72 | find_priolist: |
| 73 | /* most positive priority is scheduled first, equal priorities fifo */ |
| 74 | rb = NULL; |
| 75 | parent = &sched_engine->queue.rb_root.rb_node; |
| 76 | while (*parent) { |
| 77 | rb = *parent; |
| 78 | p = to_priolist(rb); |
| 79 | if (prio > p->priority) { |
| 80 | parent = &rb->rb_left; |
| 81 | } else if (prio < p->priority) { |
| 82 | parent = &rb->rb_right; |
| 83 | first = false; |
| 84 | } else { |
| 85 | return &p->requests; |
| 86 | } |
| 87 | } |
| 88 | |
| 89 | if (prio == I915_PRIORITY_NORMAL) { |
| 90 | p = &sched_engine->default_priolist; |
| 91 | } else { |
| 92 | p = kmem_cache_alloc(slab_priorities, GFP_ATOMIC); |
| 93 | /* Convert an allocation failure to a priority bump */ |
| 94 | if (unlikely(!p)) { |
| 95 | prio = I915_PRIORITY_NORMAL; /* recurses just once */ |
| 96 | |
| 97 | /* To maintain ordering with all rendering, after an |
| 98 | * allocation failure we have to disable all scheduling. |
| 99 | * Requests will then be executed in fifo, and schedule |
| 100 | * will ensure that dependencies are emitted in fifo. |
| 101 | * There will be still some reordering with existing |
| 102 | * requests, so if userspace lied about their |
| 103 | * dependencies that reordering may be visible. |
| 104 | */ |
| 105 | sched_engine->no_priolist = true; |
| 106 | goto find_priolist; |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | p->priority = prio; |
| 111 | INIT_LIST_HEAD(list: &p->requests); |
| 112 | |
| 113 | rb_link_node(node: &p->node, parent: rb, rb_link: parent); |
| 114 | rb_insert_color_cached(node: &p->node, root: &sched_engine->queue, leftmost: first); |
| 115 | |
| 116 | return &p->requests; |
| 117 | } |
| 118 | |
| 119 | void __i915_priolist_free(struct i915_priolist *p) |
| 120 | { |
| 121 | kmem_cache_free(s: slab_priorities, objp: p); |
| 122 | } |
| 123 | |
| 124 | struct sched_cache { |
| 125 | struct list_head *priolist; |
| 126 | }; |
| 127 | |
| 128 | static struct i915_sched_engine * |
| 129 | lock_sched_engine(struct i915_sched_node *node, |
| 130 | struct i915_sched_engine *locked, |
| 131 | struct sched_cache *cache) |
| 132 | { |
| 133 | const struct i915_request *rq = node_to_request(node); |
| 134 | struct i915_sched_engine *sched_engine; |
| 135 | |
| 136 | GEM_BUG_ON(!locked); |
| 137 | |
| 138 | /* |
| 139 | * Virtual engines complicate acquiring the engine timeline lock, |
| 140 | * as their rq->engine pointer is not stable until under that |
| 141 | * engine lock. The simple ploy we use is to take the lock then |
| 142 | * check that the rq still belongs to the newly locked engine. |
| 143 | */ |
| 144 | while (locked != (sched_engine = READ_ONCE(rq->engine)->sched_engine)) { |
| 145 | spin_unlock(lock: &locked->lock); |
| 146 | memset(cache, 0, sizeof(*cache)); |
| 147 | spin_lock(lock: &sched_engine->lock); |
| 148 | locked = sched_engine; |
| 149 | } |
| 150 | |
| 151 | GEM_BUG_ON(locked != sched_engine); |
| 152 | return locked; |
| 153 | } |
| 154 | |
| 155 | static void __i915_schedule(struct i915_sched_node *node, |
| 156 | const struct i915_sched_attr *attr) |
| 157 | { |
| 158 | const int prio = max(attr->priority, node->attr.priority); |
| 159 | struct i915_sched_engine *sched_engine; |
| 160 | struct i915_dependency *dep, *p; |
| 161 | struct i915_dependency stack; |
| 162 | struct sched_cache cache; |
| 163 | LIST_HEAD(dfs); |
| 164 | |
| 165 | /* Needed in order to use the temporary link inside i915_dependency */ |
| 166 | lockdep_assert_held(&schedule_lock); |
| 167 | GEM_BUG_ON(prio == I915_PRIORITY_INVALID); |
| 168 | |
| 169 | if (node_signaled(node)) |
| 170 | return; |
| 171 | |
| 172 | stack.signaler = node; |
| 173 | list_add(new: &stack.dfs_link, head: &dfs); |
| 174 | |
| 175 | /* |
| 176 | * Recursively bump all dependent priorities to match the new request. |
| 177 | * |
| 178 | * A naive approach would be to use recursion: |
| 179 | * static void update_priorities(struct i915_sched_node *node, prio) { |
| 180 | * list_for_each_entry(dep, &node->signalers_list, signal_link) |
| 181 | * update_priorities(dep->signal, prio) |
| 182 | * queue_request(node); |
| 183 | * } |
| 184 | * but that may have unlimited recursion depth and so runs a very |
| 185 | * real risk of overunning the kernel stack. Instead, we build |
| 186 | * a flat list of all dependencies starting with the current request. |
| 187 | * As we walk the list of dependencies, we add all of its dependencies |
| 188 | * to the end of the list (this may include an already visited |
| 189 | * request) and continue to walk onwards onto the new dependencies. The |
| 190 | * end result is a topological list of requests in reverse order, the |
| 191 | * last element in the list is the request we must execute first. |
| 192 | */ |
| 193 | list_for_each_entry(dep, &dfs, dfs_link) { |
| 194 | struct i915_sched_node *node = dep->signaler; |
| 195 | |
| 196 | /* If we are already flying, we know we have no signalers */ |
| 197 | if (node_started(node)) |
| 198 | continue; |
| 199 | |
| 200 | /* |
| 201 | * Within an engine, there can be no cycle, but we may |
| 202 | * refer to the same dependency chain multiple times |
| 203 | * (redundant dependencies are not eliminated) and across |
| 204 | * engines. |
| 205 | */ |
| 206 | list_for_each_entry(p, &node->signalers_list, signal_link) { |
| 207 | GEM_BUG_ON(p == dep); /* no cycles! */ |
| 208 | |
| 209 | if (node_signaled(node: p->signaler)) |
| 210 | continue; |
| 211 | |
| 212 | if (prio > READ_ONCE(p->signaler->attr.priority)) |
| 213 | list_move_tail(list: &p->dfs_link, head: &dfs); |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | /* |
| 218 | * If we didn't need to bump any existing priorities, and we haven't |
| 219 | * yet submitted this request (i.e. there is no potential race with |
| 220 | * execlists_submit_request()), we can set our own priority and skip |
| 221 | * acquiring the engine locks. |
| 222 | */ |
| 223 | if (node->attr.priority == I915_PRIORITY_INVALID) { |
| 224 | GEM_BUG_ON(!list_empty(&node->link)); |
| 225 | node->attr = *attr; |
| 226 | |
| 227 | if (stack.dfs_link.next == stack.dfs_link.prev) |
| 228 | return; |
| 229 | |
| 230 | __list_del_entry(entry: &stack.dfs_link); |
| 231 | } |
| 232 | |
| 233 | memset(&cache, 0, sizeof(cache)); |
| 234 | sched_engine = node_to_request(node)->engine->sched_engine; |
| 235 | spin_lock(lock: &sched_engine->lock); |
| 236 | |
| 237 | /* Fifo and depth-first replacement ensure our deps execute before us */ |
| 238 | sched_engine = lock_sched_engine(node, locked: sched_engine, cache: &cache); |
| 239 | list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) { |
| 240 | struct i915_request *from = container_of(dep->signaler, |
| 241 | struct i915_request, |
| 242 | sched); |
| 243 | INIT_LIST_HEAD(list: &dep->dfs_link); |
| 244 | |
| 245 | node = dep->signaler; |
| 246 | sched_engine = lock_sched_engine(node, locked: sched_engine, cache: &cache); |
| 247 | lockdep_assert_held(&sched_engine->lock); |
| 248 | |
| 249 | /* Recheck after acquiring the engine->timeline.lock */ |
| 250 | if (prio <= node->attr.priority || node_signaled(node)) |
| 251 | continue; |
| 252 | |
| 253 | GEM_BUG_ON(node_to_request(node)->engine->sched_engine != |
| 254 | sched_engine); |
| 255 | |
| 256 | /* Must be called before changing the nodes priority */ |
| 257 | if (sched_engine->bump_inflight_request_prio) |
| 258 | sched_engine->bump_inflight_request_prio(from, prio); |
| 259 | |
| 260 | WRITE_ONCE(node->attr.priority, prio); |
| 261 | |
| 262 | /* |
| 263 | * Once the request is ready, it will be placed into the |
| 264 | * priority lists and then onto the HW runlist. Before the |
| 265 | * request is ready, it does not contribute to our preemption |
| 266 | * decisions and we can safely ignore it, as it will, and |
| 267 | * any preemption required, be dealt with upon submission. |
| 268 | * See engine->submit_request() |
| 269 | */ |
| 270 | if (list_empty(head: &node->link)) |
| 271 | continue; |
| 272 | |
| 273 | if (i915_request_in_priority_queue(rq: node_to_request(node))) { |
| 274 | if (!cache.priolist) |
| 275 | cache.priolist = |
| 276 | i915_sched_lookup_priolist(sched_engine, |
| 277 | prio); |
| 278 | list_move_tail(list: &node->link, head: cache.priolist); |
| 279 | } |
| 280 | |
| 281 | /* Defer (tasklet) submission until after all of our updates. */ |
| 282 | if (sched_engine->kick_backend) |
| 283 | sched_engine->kick_backend(node_to_request(node), prio); |
| 284 | } |
| 285 | |
| 286 | spin_unlock(lock: &sched_engine->lock); |
| 287 | } |
| 288 | |
| 289 | void i915_schedule(struct i915_request *rq, const struct i915_sched_attr *attr) |
| 290 | { |
| 291 | spin_lock_irq(lock: &schedule_lock); |
| 292 | __i915_schedule(node: &rq->sched, attr); |
| 293 | spin_unlock_irq(lock: &schedule_lock); |
| 294 | } |
| 295 | |
| 296 | void i915_sched_node_init(struct i915_sched_node *node) |
| 297 | { |
| 298 | INIT_LIST_HEAD(list: &node->signalers_list); |
| 299 | INIT_LIST_HEAD(list: &node->waiters_list); |
| 300 | INIT_LIST_HEAD(list: &node->link); |
| 301 | |
| 302 | i915_sched_node_reinit(node); |
| 303 | } |
| 304 | |
| 305 | void i915_sched_node_reinit(struct i915_sched_node *node) |
| 306 | { |
| 307 | node->attr.priority = I915_PRIORITY_INVALID; |
| 308 | node->semaphores = 0; |
| 309 | node->flags = 0; |
| 310 | |
| 311 | GEM_BUG_ON(!list_empty(&node->signalers_list)); |
| 312 | GEM_BUG_ON(!list_empty(&node->waiters_list)); |
| 313 | GEM_BUG_ON(!list_empty(&node->link)); |
| 314 | } |
| 315 | |
| 316 | static struct i915_dependency * |
| 317 | i915_dependency_alloc(void) |
| 318 | { |
| 319 | return kmem_cache_alloc(slab_dependencies, GFP_KERNEL); |
| 320 | } |
| 321 | |
| 322 | static void |
| 323 | i915_dependency_free(struct i915_dependency *dep) |
| 324 | { |
| 325 | kmem_cache_free(s: slab_dependencies, objp: dep); |
| 326 | } |
| 327 | |
| 328 | bool __i915_sched_node_add_dependency(struct i915_sched_node *node, |
| 329 | struct i915_sched_node *signal, |
| 330 | struct i915_dependency *dep, |
| 331 | unsigned long flags) |
| 332 | { |
| 333 | bool ret = false; |
| 334 | |
| 335 | spin_lock_irq(lock: &schedule_lock); |
| 336 | |
| 337 | if (!node_signaled(node: signal)) { |
| 338 | INIT_LIST_HEAD(list: &dep->dfs_link); |
| 339 | dep->signaler = signal; |
| 340 | dep->waiter = node; |
| 341 | dep->flags = flags; |
| 342 | |
| 343 | /* All set, now publish. Beware the lockless walkers. */ |
| 344 | list_add_rcu(new: &dep->signal_link, head: &node->signalers_list); |
| 345 | list_add_rcu(new: &dep->wait_link, head: &signal->waiters_list); |
| 346 | |
| 347 | /* Propagate the chains */ |
| 348 | node->flags |= signal->flags; |
| 349 | ret = true; |
| 350 | } |
| 351 | |
| 352 | spin_unlock_irq(lock: &schedule_lock); |
| 353 | |
| 354 | return ret; |
| 355 | } |
| 356 | |
| 357 | int i915_sched_node_add_dependency(struct i915_sched_node *node, |
| 358 | struct i915_sched_node *signal, |
| 359 | unsigned long flags) |
| 360 | { |
| 361 | struct i915_dependency *dep; |
| 362 | |
| 363 | dep = i915_dependency_alloc(); |
| 364 | if (!dep) |
| 365 | return -ENOMEM; |
| 366 | |
| 367 | if (!__i915_sched_node_add_dependency(node, signal, dep, |
| 368 | flags: flags | I915_DEPENDENCY_ALLOC)) |
| 369 | i915_dependency_free(dep); |
| 370 | |
| 371 | return 0; |
| 372 | } |
| 373 | |
| 374 | void i915_sched_node_fini(struct i915_sched_node *node) |
| 375 | { |
| 376 | struct i915_dependency *dep, *tmp; |
| 377 | |
| 378 | spin_lock_irq(lock: &schedule_lock); |
| 379 | |
| 380 | /* |
| 381 | * Everyone we depended upon (the fences we wait to be signaled) |
| 382 | * should retire before us and remove themselves from our list. |
| 383 | * However, retirement is run independently on each timeline and |
| 384 | * so we may be called out-of-order. |
| 385 | */ |
| 386 | list_for_each_entry_safe(dep, tmp, &node->signalers_list, signal_link) { |
| 387 | GEM_BUG_ON(!list_empty(&dep->dfs_link)); |
| 388 | |
| 389 | list_del_rcu(entry: &dep->wait_link); |
| 390 | if (dep->flags & I915_DEPENDENCY_ALLOC) |
| 391 | i915_dependency_free(dep); |
| 392 | } |
| 393 | INIT_LIST_HEAD(list: &node->signalers_list); |
| 394 | |
| 395 | /* Remove ourselves from everyone who depends upon us */ |
| 396 | list_for_each_entry_safe(dep, tmp, &node->waiters_list, wait_link) { |
| 397 | GEM_BUG_ON(dep->signaler != node); |
| 398 | GEM_BUG_ON(!list_empty(&dep->dfs_link)); |
| 399 | |
| 400 | list_del_rcu(entry: &dep->signal_link); |
| 401 | if (dep->flags & I915_DEPENDENCY_ALLOC) |
| 402 | i915_dependency_free(dep); |
| 403 | } |
| 404 | INIT_LIST_HEAD(list: &node->waiters_list); |
| 405 | |
| 406 | spin_unlock_irq(lock: &schedule_lock); |
| 407 | } |
| 408 | |
| 409 | void i915_request_show_with_schedule(struct drm_printer *m, |
| 410 | const struct i915_request *rq, |
| 411 | const char *prefix, |
| 412 | int indent) |
| 413 | { |
| 414 | struct i915_dependency *dep; |
| 415 | |
| 416 | i915_request_show(m, rq, prefix, indent); |
| 417 | if (i915_request_completed(rq)) |
| 418 | return; |
| 419 | |
| 420 | rcu_read_lock(); |
| 421 | for_each_signaler(dep, rq) { |
| 422 | const struct i915_request *signaler = |
| 423 | node_to_request(node: dep->signaler); |
| 424 | |
| 425 | /* Dependencies along the same timeline are expected. */ |
| 426 | if (signaler->timeline == rq->timeline) |
| 427 | continue; |
| 428 | |
| 429 | if (__i915_request_is_complete(rq: signaler)) |
| 430 | continue; |
| 431 | |
| 432 | i915_request_show(m, rq: signaler, prefix, indent: indent + 2); |
| 433 | } |
| 434 | rcu_read_unlock(); |
| 435 | } |
| 436 | |
| 437 | static void default_destroy(struct kref *kref) |
| 438 | { |
| 439 | struct i915_sched_engine *sched_engine = |
| 440 | container_of(kref, typeof(*sched_engine), ref); |
| 441 | |
| 442 | tasklet_kill(t: &sched_engine->tasklet); /* flush the callback */ |
| 443 | kfree(objp: sched_engine); |
| 444 | } |
| 445 | |
| 446 | static bool default_disabled(struct i915_sched_engine *sched_engine) |
| 447 | { |
| 448 | return false; |
| 449 | } |
| 450 | |
| 451 | struct i915_sched_engine * |
| 452 | i915_sched_engine_create(unsigned int subclass) |
| 453 | { |
| 454 | struct i915_sched_engine *sched_engine; |
| 455 | |
| 456 | sched_engine = kzalloc(sizeof(*sched_engine), GFP_KERNEL); |
| 457 | if (!sched_engine) |
| 458 | return NULL; |
| 459 | |
| 460 | kref_init(kref: &sched_engine->ref); |
| 461 | |
| 462 | sched_engine->queue = RB_ROOT_CACHED; |
| 463 | sched_engine->queue_priority_hint = INT_MIN; |
| 464 | sched_engine->destroy = default_destroy; |
| 465 | sched_engine->disabled = default_disabled; |
| 466 | |
| 467 | INIT_LIST_HEAD(list: &sched_engine->requests); |
| 468 | INIT_LIST_HEAD(list: &sched_engine->hold); |
| 469 | |
| 470 | spin_lock_init(&sched_engine->lock); |
| 471 | lockdep_set_subclass(&sched_engine->lock, subclass); |
| 472 | |
| 473 | /* |
| 474 | * Due to an interesting quirk in lockdep's internal debug tracking, |
| 475 | * after setting a subclass we must ensure the lock is used. Otherwise, |
| 476 | * nr_unused_locks is incremented once too often. |
| 477 | */ |
| 478 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
| 479 | local_irq_disable(); |
| 480 | lock_map_acquire(&sched_engine->lock.dep_map); |
| 481 | lock_map_release(&sched_engine->lock.dep_map); |
| 482 | local_irq_enable(); |
| 483 | #endif |
| 484 | |
| 485 | return sched_engine; |
| 486 | } |
| 487 | |
| 488 | void i915_scheduler_module_exit(void) |
| 489 | { |
| 490 | kmem_cache_destroy(s: slab_dependencies); |
| 491 | kmem_cache_destroy(s: slab_priorities); |
| 492 | } |
| 493 | |
| 494 | int __init i915_scheduler_module_init(void) |
| 495 | { |
| 496 | slab_dependencies = KMEM_CACHE(i915_dependency, |
| 497 | SLAB_HWCACHE_ALIGN | |
| 498 | SLAB_TYPESAFE_BY_RCU); |
| 499 | if (!slab_dependencies) |
| 500 | return -ENOMEM; |
| 501 | |
| 502 | slab_priorities = KMEM_CACHE(i915_priolist, 0); |
| 503 | if (!slab_priorities) |
| 504 | goto err_priorities; |
| 505 | |
| 506 | return 0; |
| 507 | |
| 508 | err_priorities: |
| 509 | kmem_cache_destroy(s: slab_dependencies); |
| 510 | return -ENOMEM; |
| 511 | } |
| 512 | |