| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright © 2021 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #include <linux/interval_tree_generic.h> |
| 7 | #include <linux/sched/mm.h> |
| 8 | |
| 9 | #include "i915_sw_fence.h" |
| 10 | #include "i915_vma_resource.h" |
| 11 | #include "i915_drv.h" |
| 12 | #include "intel_memory_region.h" |
| 13 | |
| 14 | #include "gt/intel_gtt.h" |
| 15 | |
| 16 | static struct kmem_cache *slab_vma_resources; |
| 17 | |
| 18 | /** |
| 19 | * DOC: |
| 20 | * We use a per-vm interval tree to keep track of vma_resources |
| 21 | * scheduled for unbind but not yet unbound. The tree is protected by |
| 22 | * the vm mutex, and nodes are removed just after the unbind fence signals. |
| 23 | * The removal takes the vm mutex from a kernel thread which we need to |
| 24 | * keep in mind so that we don't grab the mutex and try to wait for all |
| 25 | * pending unbinds to complete, because that will temporaryily block many |
| 26 | * of the workqueue threads, and people will get angry. |
| 27 | * |
| 28 | * We should consider using a single ordered fence per VM instead but that |
| 29 | * requires ordering the unbinds and might introduce unnecessary waiting |
| 30 | * for unrelated unbinds. Amount of code will probably be roughly the same |
| 31 | * due to the simplicity of using the interval tree interface. |
| 32 | * |
| 33 | * Another drawback of this interval tree is that the complexity of insertion |
| 34 | * and removal of fences increases as O(ln(pending_unbinds)) instead of |
| 35 | * O(1) for a single fence without interval tree. |
| 36 | */ |
| 37 | #define VMA_RES_START(_node) ((_node)->start - (_node)->guard) |
| 38 | #define VMA_RES_LAST(_node) ((_node)->start + (_node)->node_size + (_node)->guard - 1) |
| 39 | INTERVAL_TREE_DEFINE(struct i915_vma_resource, rb, |
| 40 | u64, __subtree_last, |
| 41 | VMA_RES_START, VMA_RES_LAST, static, vma_res_itree); |
| 42 | |
| 43 | /* Callbacks for the unbind dma-fence. */ |
| 44 | |
| 45 | /** |
| 46 | * i915_vma_resource_alloc - Allocate a vma resource |
| 47 | * |
| 48 | * Return: A pointer to a cleared struct i915_vma_resource or |
| 49 | * a -ENOMEM error pointer if allocation fails. |
| 50 | */ |
| 51 | struct i915_vma_resource *i915_vma_resource_alloc(void) |
| 52 | { |
| 53 | struct i915_vma_resource *vma_res = |
| 54 | kmem_cache_zalloc(slab_vma_resources, GFP_KERNEL); |
| 55 | |
| 56 | return vma_res ? vma_res : ERR_PTR(error: -ENOMEM); |
| 57 | } |
| 58 | |
| 59 | /** |
| 60 | * i915_vma_resource_free - Free a vma resource |
| 61 | * @vma_res: The vma resource to free. |
| 62 | */ |
| 63 | void i915_vma_resource_free(struct i915_vma_resource *vma_res) |
| 64 | { |
| 65 | if (vma_res) |
| 66 | kmem_cache_free(s: slab_vma_resources, objp: vma_res); |
| 67 | } |
| 68 | |
| 69 | static const char *get_driver_name(struct dma_fence *fence) |
| 70 | { |
| 71 | return "vma unbind fence" ; |
| 72 | } |
| 73 | |
| 74 | static const char *get_timeline_name(struct dma_fence *fence) |
| 75 | { |
| 76 | return "unbound" ; |
| 77 | } |
| 78 | |
| 79 | static void unbind_fence_free_rcu(struct rcu_head *head) |
| 80 | { |
| 81 | struct i915_vma_resource *vma_res = |
| 82 | container_of(head, typeof(*vma_res), unbind_fence.rcu); |
| 83 | |
| 84 | i915_vma_resource_free(vma_res); |
| 85 | } |
| 86 | |
| 87 | static void unbind_fence_release(struct dma_fence *fence) |
| 88 | { |
| 89 | struct i915_vma_resource *vma_res = |
| 90 | container_of(fence, typeof(*vma_res), unbind_fence); |
| 91 | |
| 92 | i915_sw_fence_fini(fence: &vma_res->chain); |
| 93 | |
| 94 | call_rcu(head: &fence->rcu, func: unbind_fence_free_rcu); |
| 95 | } |
| 96 | |
| 97 | static const struct dma_fence_ops unbind_fence_ops = { |
| 98 | .get_driver_name = get_driver_name, |
| 99 | .get_timeline_name = get_timeline_name, |
| 100 | .release = unbind_fence_release, |
| 101 | }; |
| 102 | |
| 103 | static void __i915_vma_resource_unhold(struct i915_vma_resource *vma_res) |
| 104 | { |
| 105 | struct i915_address_space *vm; |
| 106 | |
| 107 | if (!refcount_dec_and_test(r: &vma_res->hold_count)) |
| 108 | return; |
| 109 | |
| 110 | dma_fence_signal(fence: &vma_res->unbind_fence); |
| 111 | |
| 112 | vm = vma_res->vm; |
| 113 | if (vma_res->wakeref) |
| 114 | intel_runtime_pm_put(rpm: &vm->i915->runtime_pm, wref: vma_res->wakeref); |
| 115 | |
| 116 | vma_res->vm = NULL; |
| 117 | if (!RB_EMPTY_NODE(&vma_res->rb)) { |
| 118 | mutex_lock(&vm->mutex); |
| 119 | vma_res_itree_remove(node: vma_res, root: &vm->pending_unbind); |
| 120 | mutex_unlock(lock: &vm->mutex); |
| 121 | } |
| 122 | |
| 123 | if (vma_res->bi.pages_rsgt) |
| 124 | i915_refct_sgt_put(rsgt: vma_res->bi.pages_rsgt); |
| 125 | } |
| 126 | |
| 127 | /** |
| 128 | * i915_vma_resource_unhold - Unhold the signaling of the vma resource unbind |
| 129 | * fence. |
| 130 | * @vma_res: The vma resource. |
| 131 | * @lockdep_cookie: The lockdep cookie returned from i915_vma_resource_hold. |
| 132 | * |
| 133 | * The function may leave a dma_fence critical section. |
| 134 | */ |
| 135 | void i915_vma_resource_unhold(struct i915_vma_resource *vma_res, |
| 136 | bool lockdep_cookie) |
| 137 | { |
| 138 | dma_fence_end_signalling(cookie: lockdep_cookie); |
| 139 | |
| 140 | if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { |
| 141 | unsigned long irq_flags; |
| 142 | |
| 143 | /* Inefficient open-coded might_lock_irqsave() */ |
| 144 | spin_lock_irqsave(&vma_res->lock, irq_flags); |
| 145 | spin_unlock_irqrestore(lock: &vma_res->lock, flags: irq_flags); |
| 146 | } |
| 147 | |
| 148 | __i915_vma_resource_unhold(vma_res); |
| 149 | } |
| 150 | |
| 151 | /** |
| 152 | * i915_vma_resource_hold - Hold the signaling of the vma resource unbind fence. |
| 153 | * @vma_res: The vma resource. |
| 154 | * @lockdep_cookie: Pointer to a bool serving as a lockdep cooke that should |
| 155 | * be given as an argument to the pairing i915_vma_resource_unhold. |
| 156 | * |
| 157 | * If returning true, the function enters a dma_fence signalling critical |
| 158 | * section if not in one already. |
| 159 | * |
| 160 | * Return: true if holding successful, false if not. |
| 161 | */ |
| 162 | bool i915_vma_resource_hold(struct i915_vma_resource *vma_res, |
| 163 | bool *lockdep_cookie) |
| 164 | { |
| 165 | bool held = refcount_inc_not_zero(r: &vma_res->hold_count); |
| 166 | |
| 167 | if (held) |
| 168 | *lockdep_cookie = dma_fence_begin_signalling(); |
| 169 | |
| 170 | return held; |
| 171 | } |
| 172 | |
| 173 | static void i915_vma_resource_unbind_work(struct work_struct *work) |
| 174 | { |
| 175 | struct i915_vma_resource *vma_res = |
| 176 | container_of(work, typeof(*vma_res), work); |
| 177 | struct i915_address_space *vm = vma_res->vm; |
| 178 | bool lockdep_cookie; |
| 179 | |
| 180 | lockdep_cookie = dma_fence_begin_signalling(); |
| 181 | if (likely(!vma_res->skip_pte_rewrite)) |
| 182 | vma_res->ops->unbind_vma(vm, vma_res); |
| 183 | |
| 184 | dma_fence_end_signalling(cookie: lockdep_cookie); |
| 185 | __i915_vma_resource_unhold(vma_res); |
| 186 | i915_vma_resource_put(vma_res); |
| 187 | } |
| 188 | |
| 189 | static int |
| 190 | i915_vma_resource_fence_notify(struct i915_sw_fence *fence, |
| 191 | enum i915_sw_fence_notify state) |
| 192 | { |
| 193 | struct i915_vma_resource *vma_res = |
| 194 | container_of(fence, typeof(*vma_res), chain); |
| 195 | struct dma_fence *unbind_fence = |
| 196 | &vma_res->unbind_fence; |
| 197 | |
| 198 | switch (state) { |
| 199 | case FENCE_COMPLETE: |
| 200 | dma_fence_get(fence: unbind_fence); |
| 201 | if (vma_res->immediate_unbind) { |
| 202 | i915_vma_resource_unbind_work(work: &vma_res->work); |
| 203 | } else { |
| 204 | INIT_WORK(&vma_res->work, i915_vma_resource_unbind_work); |
| 205 | queue_work(wq: system_unbound_wq, work: &vma_res->work); |
| 206 | } |
| 207 | break; |
| 208 | case FENCE_FREE: |
| 209 | i915_vma_resource_put(vma_res); |
| 210 | break; |
| 211 | } |
| 212 | |
| 213 | return NOTIFY_DONE; |
| 214 | } |
| 215 | |
| 216 | /** |
| 217 | * i915_vma_resource_unbind - Unbind a vma resource |
| 218 | * @vma_res: The vma resource to unbind. |
| 219 | * @tlb: pointer to vma->obj->mm.tlb associated with the resource |
| 220 | * to be stored at vma_res->tlb. When not-NULL, it will be used |
| 221 | * to do TLB cache invalidation before freeing a VMA resource. |
| 222 | * Used only for async unbind. |
| 223 | * |
| 224 | * At this point this function does little more than publish a fence that |
| 225 | * signals immediately unless signaling is held back. |
| 226 | * |
| 227 | * Return: A refcounted pointer to a dma-fence that signals when unbinding is |
| 228 | * complete. |
| 229 | */ |
| 230 | struct dma_fence *i915_vma_resource_unbind(struct i915_vma_resource *vma_res, |
| 231 | u32 *tlb) |
| 232 | { |
| 233 | struct i915_address_space *vm = vma_res->vm; |
| 234 | |
| 235 | vma_res->tlb = tlb; |
| 236 | |
| 237 | /* Reference for the sw fence */ |
| 238 | i915_vma_resource_get(vma_res); |
| 239 | |
| 240 | /* Caller must already have a wakeref in this case. */ |
| 241 | if (vma_res->needs_wakeref) |
| 242 | vma_res->wakeref = intel_runtime_pm_get_if_in_use(rpm: &vm->i915->runtime_pm); |
| 243 | |
| 244 | if (atomic_read(v: &vma_res->chain.pending) <= 1) { |
| 245 | RB_CLEAR_NODE(&vma_res->rb); |
| 246 | vma_res->immediate_unbind = 1; |
| 247 | } else { |
| 248 | vma_res_itree_insert(node: vma_res, root: &vma_res->vm->pending_unbind); |
| 249 | } |
| 250 | |
| 251 | i915_sw_fence_commit(fence: &vma_res->chain); |
| 252 | |
| 253 | return &vma_res->unbind_fence; |
| 254 | } |
| 255 | |
| 256 | /** |
| 257 | * __i915_vma_resource_init - Initialize a vma resource. |
| 258 | * @vma_res: The vma resource to initialize |
| 259 | * |
| 260 | * Initializes the private members of a vma resource. |
| 261 | */ |
| 262 | void __i915_vma_resource_init(struct i915_vma_resource *vma_res) |
| 263 | { |
| 264 | spin_lock_init(&vma_res->lock); |
| 265 | dma_fence_init(fence: &vma_res->unbind_fence, ops: &unbind_fence_ops, |
| 266 | lock: &vma_res->lock, context: 0, seqno: 0); |
| 267 | refcount_set(r: &vma_res->hold_count, n: 1); |
| 268 | i915_sw_fence_init(&vma_res->chain, i915_vma_resource_fence_notify); |
| 269 | } |
| 270 | |
| 271 | static void |
| 272 | i915_vma_resource_color_adjust_range(struct i915_address_space *vm, |
| 273 | u64 *start, |
| 274 | u64 *end) |
| 275 | { |
| 276 | if (i915_vm_has_cache_coloring(vm)) { |
| 277 | if (*start) |
| 278 | *start -= I915_GTT_PAGE_SIZE; |
| 279 | *end += I915_GTT_PAGE_SIZE; |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | /** |
| 284 | * i915_vma_resource_bind_dep_sync - Wait for / sync all unbinds touching a |
| 285 | * certain vm range. |
| 286 | * @vm: The vm to look at. |
| 287 | * @offset: The range start. |
| 288 | * @size: The range size. |
| 289 | * @intr: Whether to wait interrubtible. |
| 290 | * |
| 291 | * The function needs to be called with the vm lock held. |
| 292 | * |
| 293 | * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true |
| 294 | */ |
| 295 | int i915_vma_resource_bind_dep_sync(struct i915_address_space *vm, |
| 296 | u64 offset, |
| 297 | u64 size, |
| 298 | bool intr) |
| 299 | { |
| 300 | struct i915_vma_resource *node; |
| 301 | u64 last = offset + size - 1; |
| 302 | |
| 303 | lockdep_assert_held(&vm->mutex); |
| 304 | might_sleep(); |
| 305 | |
| 306 | i915_vma_resource_color_adjust_range(vm, start: &offset, end: &last); |
| 307 | node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: offset, last); |
| 308 | while (node) { |
| 309 | int ret = dma_fence_wait(fence: &node->unbind_fence, intr); |
| 310 | |
| 311 | if (ret) |
| 312 | return ret; |
| 313 | |
| 314 | node = vma_res_itree_iter_next(node, start: offset, last); |
| 315 | } |
| 316 | |
| 317 | return 0; |
| 318 | } |
| 319 | |
| 320 | /** |
| 321 | * i915_vma_resource_bind_dep_sync_all - Wait for / sync all unbinds of a vm, |
| 322 | * releasing the vm lock while waiting. |
| 323 | * @vm: The vm to look at. |
| 324 | * |
| 325 | * The function may not be called with the vm lock held. |
| 326 | * Typically this is called at vm destruction to finish any pending |
| 327 | * unbind operations. The vm mutex is released while waiting to avoid |
| 328 | * stalling kernel workqueues trying to grab the mutex. |
| 329 | */ |
| 330 | void i915_vma_resource_bind_dep_sync_all(struct i915_address_space *vm) |
| 331 | { |
| 332 | struct i915_vma_resource *node; |
| 333 | struct dma_fence *fence; |
| 334 | |
| 335 | do { |
| 336 | fence = NULL; |
| 337 | mutex_lock(&vm->mutex); |
| 338 | node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: 0, |
| 339 | U64_MAX); |
| 340 | if (node) |
| 341 | fence = dma_fence_get_rcu(fence: &node->unbind_fence); |
| 342 | mutex_unlock(lock: &vm->mutex); |
| 343 | |
| 344 | if (fence) { |
| 345 | /* |
| 346 | * The wait makes sure the node eventually removes |
| 347 | * itself from the tree. |
| 348 | */ |
| 349 | dma_fence_wait(fence, intr: false); |
| 350 | dma_fence_put(fence); |
| 351 | } |
| 352 | } while (node); |
| 353 | } |
| 354 | |
| 355 | /** |
| 356 | * i915_vma_resource_bind_dep_await - Have a struct i915_sw_fence await all |
| 357 | * pending unbinds in a certain range of a vm. |
| 358 | * @vm: The vm to look at. |
| 359 | * @sw_fence: The struct i915_sw_fence that will be awaiting the unbinds. |
| 360 | * @offset: The range start. |
| 361 | * @size: The range size. |
| 362 | * @intr: Whether to wait interrubtible. |
| 363 | * @gfp: Allocation mode for memory allocations. |
| 364 | * |
| 365 | * The function makes @sw_fence await all pending unbinds in a certain |
| 366 | * vm range before calling the complete notifier. To be able to await |
| 367 | * each individual unbind, the function needs to allocate memory using |
| 368 | * the @gpf allocation mode. If that fails, the function will instead |
| 369 | * wait for the unbind fence to signal, using @intr to judge whether to |
| 370 | * wait interruptible or not. Note that @gfp should ideally be selected so |
| 371 | * as to avoid any expensive memory allocation stalls and rather fail and |
| 372 | * synchronize itself. For now the vm mutex is required when calling this |
| 373 | * function with means that @gfp can't call into direct reclaim. In reality |
| 374 | * this means that during heavy memory pressure, we will sync in this |
| 375 | * function. |
| 376 | * |
| 377 | * Return: Zero on success, -ERESTARTSYS if interrupted and @intr==true |
| 378 | */ |
| 379 | int i915_vma_resource_bind_dep_await(struct i915_address_space *vm, |
| 380 | struct i915_sw_fence *sw_fence, |
| 381 | u64 offset, |
| 382 | u64 size, |
| 383 | bool intr, |
| 384 | gfp_t gfp) |
| 385 | { |
| 386 | struct i915_vma_resource *node; |
| 387 | u64 last = offset + size - 1; |
| 388 | |
| 389 | lockdep_assert_held(&vm->mutex); |
| 390 | might_alloc(gfp_mask: gfp); |
| 391 | might_sleep(); |
| 392 | |
| 393 | i915_vma_resource_color_adjust_range(vm, start: &offset, end: &last); |
| 394 | node = vma_res_itree_iter_first(root: &vm->pending_unbind, start: offset, last); |
| 395 | while (node) { |
| 396 | int ret; |
| 397 | |
| 398 | ret = i915_sw_fence_await_dma_fence(fence: sw_fence, |
| 399 | dma: &node->unbind_fence, |
| 400 | timeout: 0, gfp); |
| 401 | if (ret < 0) { |
| 402 | ret = dma_fence_wait(fence: &node->unbind_fence, intr); |
| 403 | if (ret) |
| 404 | return ret; |
| 405 | } |
| 406 | |
| 407 | node = vma_res_itree_iter_next(node, start: offset, last); |
| 408 | } |
| 409 | |
| 410 | return 0; |
| 411 | } |
| 412 | |
| 413 | void i915_vma_resource_module_exit(void) |
| 414 | { |
| 415 | kmem_cache_destroy(s: slab_vma_resources); |
| 416 | } |
| 417 | |
| 418 | int __init i915_vma_resource_module_init(void) |
| 419 | { |
| 420 | slab_vma_resources = KMEM_CACHE(i915_vma_resource, SLAB_HWCACHE_ALIGN); |
| 421 | if (!slab_vma_resources) |
| 422 | return -ENOMEM; |
| 423 | |
| 424 | return 0; |
| 425 | } |
| 426 | |