| 1 | /* SPDX-License-Identifier: MIT */ |
| 2 | /* |
| 3 | * Copyright © 2016 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #ifndef __I915_GEM_OBJECT_TYPES_H__ |
| 7 | #define __I915_GEM_OBJECT_TYPES_H__ |
| 8 | |
| 9 | #include <linux/mmu_notifier.h> |
| 10 | |
| 11 | #include <drm/drm_gem.h> |
| 12 | #include <drm/ttm/ttm_bo.h> |
| 13 | #include <uapi/drm/i915_drm.h> |
| 14 | |
| 15 | #include "i915_active.h" |
| 16 | #include "i915_selftest.h" |
| 17 | #include "i915_vma_resource.h" |
| 18 | |
| 19 | #include "gt/intel_gt_defines.h" |
| 20 | |
| 21 | struct drm_i915_gem_object; |
| 22 | struct intel_fronbuffer; |
| 23 | struct intel_memory_region; |
| 24 | |
| 25 | /* |
| 26 | * struct i915_lut_handle tracks the fast lookups from handle to vma used |
| 27 | * for execbuf. Although we use a radixtree for that mapping, in order to |
| 28 | * remove them as the object or context is closed, we need a secondary list |
| 29 | * and a translation entry (i915_lut_handle). |
| 30 | */ |
| 31 | struct i915_lut_handle { |
| 32 | struct list_head obj_link; |
| 33 | struct i915_gem_context *ctx; |
| 34 | u32 handle; |
| 35 | }; |
| 36 | |
| 37 | struct drm_i915_gem_object_ops { |
| 38 | unsigned int flags; |
| 39 | #define I915_GEM_OBJECT_IS_SHRINKABLE BIT(1) |
| 40 | /* Skip the shrinker management in set_pages/unset_pages */ |
| 41 | #define I915_GEM_OBJECT_SELF_MANAGED_SHRINK_LIST BIT(2) |
| 42 | #define I915_GEM_OBJECT_IS_PROXY BIT(3) |
| 43 | #define I915_GEM_OBJECT_NO_MMAP BIT(4) |
| 44 | |
| 45 | /* Interface between the GEM object and its backing storage. |
| 46 | * get_pages() is called once prior to the use of the associated set |
| 47 | * of pages before to binding them into the GTT, and put_pages() is |
| 48 | * called after we no longer need them. As we expect there to be |
| 49 | * associated cost with migrating pages between the backing storage |
| 50 | * and making them available for the GPU (e.g. clflush), we may hold |
| 51 | * onto the pages after they are no longer referenced by the GPU |
| 52 | * in case they may be used again shortly (for example migrating the |
| 53 | * pages to a different memory domain within the GTT). put_pages() |
| 54 | * will therefore most likely be called when the object itself is |
| 55 | * being released or under memory pressure (where we attempt to |
| 56 | * reap pages for the shrinker). |
| 57 | */ |
| 58 | int (*get_pages)(struct drm_i915_gem_object *obj); |
| 59 | void (*put_pages)(struct drm_i915_gem_object *obj, |
| 60 | struct sg_table *pages); |
| 61 | int (*truncate)(struct drm_i915_gem_object *obj); |
| 62 | /** |
| 63 | * shrink - Perform further backend specific actions to facilate |
| 64 | * shrinking. |
| 65 | * @obj: The gem object |
| 66 | * @flags: Extra flags to control shrinking behaviour in the backend |
| 67 | * |
| 68 | * Possible values for @flags: |
| 69 | * |
| 70 | * I915_GEM_OBJECT_SHRINK_WRITEBACK - Try to perform writeback of the |
| 71 | * backing pages, if supported. |
| 72 | * |
| 73 | * I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT - Don't wait for the object to |
| 74 | * idle. Active objects can be considered later. The TTM backend for |
| 75 | * example might have aync migrations going on, which don't use any |
| 76 | * i915_vma to track the active GTT binding, and hence having an unbound |
| 77 | * object might not be enough. |
| 78 | */ |
| 79 | #define I915_GEM_OBJECT_SHRINK_WRITEBACK BIT(0) |
| 80 | #define I915_GEM_OBJECT_SHRINK_NO_GPU_WAIT BIT(1) |
| 81 | int (*shrink)(struct drm_i915_gem_object *obj, unsigned int flags); |
| 82 | |
| 83 | int (*pread)(struct drm_i915_gem_object *obj, |
| 84 | const struct drm_i915_gem_pread *arg); |
| 85 | int (*pwrite)(struct drm_i915_gem_object *obj, |
| 86 | const struct drm_i915_gem_pwrite *arg); |
| 87 | u64 (*mmap_offset)(struct drm_i915_gem_object *obj); |
| 88 | void (*unmap_virtual)(struct drm_i915_gem_object *obj); |
| 89 | |
| 90 | int (*dmabuf_export)(struct drm_i915_gem_object *obj); |
| 91 | |
| 92 | /** |
| 93 | * adjust_lru - notify that the madvise value was updated |
| 94 | * @obj: The gem object |
| 95 | * |
| 96 | * The madvise value may have been updated, or object was recently |
| 97 | * referenced so act accordingly (Perhaps changing an LRU list etc). |
| 98 | */ |
| 99 | void (*adjust_lru)(struct drm_i915_gem_object *obj); |
| 100 | |
| 101 | /** |
| 102 | * delayed_free - Override the default delayed free implementation |
| 103 | */ |
| 104 | void (*delayed_free)(struct drm_i915_gem_object *obj); |
| 105 | |
| 106 | /** |
| 107 | * migrate - Migrate object to a different region either for |
| 108 | * pinning or for as long as the object lock is held. |
| 109 | */ |
| 110 | int (*migrate)(struct drm_i915_gem_object *obj, |
| 111 | struct intel_memory_region *mr, |
| 112 | unsigned int flags); |
| 113 | |
| 114 | void (*release)(struct drm_i915_gem_object *obj); |
| 115 | |
| 116 | const struct vm_operations_struct *mmap_ops; |
| 117 | const char *name; /* friendly name for debug, e.g. lockdep classes */ |
| 118 | }; |
| 119 | |
| 120 | /** |
| 121 | * enum i915_cache_level - The supported GTT caching values for system memory |
| 122 | * pages. |
| 123 | * |
| 124 | * These translate to some special GTT PTE bits when binding pages into some |
| 125 | * address space. It also determines whether an object, or rather its pages are |
| 126 | * coherent with the GPU, when also reading or writing through the CPU cache |
| 127 | * with those pages. |
| 128 | * |
| 129 | * Userspace can also control this through struct drm_i915_gem_caching. |
| 130 | */ |
| 131 | enum i915_cache_level { |
| 132 | /** |
| 133 | * @I915_CACHE_NONE: |
| 134 | * |
| 135 | * GPU access is not coherent with the CPU cache. If the cache is dirty |
| 136 | * and we need the underlying pages to be coherent with some later GPU |
| 137 | * access then we need to manually flush the pages. |
| 138 | * |
| 139 | * On shared LLC platforms reads and writes through the CPU cache are |
| 140 | * still coherent even with this setting. See also |
| 141 | * &drm_i915_gem_object.cache_coherent for more details. Due to this we |
| 142 | * should only ever use uncached for scanout surfaces, otherwise we end |
| 143 | * up over-flushing in some places. |
| 144 | * |
| 145 | * This is the default on non-LLC platforms. |
| 146 | */ |
| 147 | I915_CACHE_NONE = 0, |
| 148 | /** |
| 149 | * @I915_CACHE_LLC: |
| 150 | * |
| 151 | * GPU access is coherent with the CPU cache. If the cache is dirty, |
| 152 | * then the GPU will ensure that access remains coherent, when both |
| 153 | * reading and writing through the CPU cache. GPU writes can dirty the |
| 154 | * CPU cache. |
| 155 | * |
| 156 | * Not used for scanout surfaces. |
| 157 | * |
| 158 | * Applies to both platforms with shared LLC(HAS_LLC), and snooping |
| 159 | * based platforms(HAS_SNOOP). |
| 160 | * |
| 161 | * This is the default on shared LLC platforms. The only exception is |
| 162 | * scanout objects, where the display engine is not coherent with the |
| 163 | * CPU cache. For such objects I915_CACHE_NONE or I915_CACHE_WT is |
| 164 | * automatically applied by the kernel in pin_for_display, if userspace |
| 165 | * has not done so already. |
| 166 | */ |
| 167 | I915_CACHE_LLC, |
| 168 | /** |
| 169 | * @I915_CACHE_L3_LLC: |
| 170 | * |
| 171 | * Explicitly enable the Gfx L3 cache, with coherent LLC. |
| 172 | * |
| 173 | * The Gfx L3 sits between the domain specific caches, e.g |
| 174 | * sampler/render caches, and the larger LLC. LLC is coherent with the |
| 175 | * GPU, but L3 is only visible to the GPU, so likely needs to be flushed |
| 176 | * when the workload completes. |
| 177 | * |
| 178 | * Not used for scanout surfaces. |
| 179 | * |
| 180 | * Only exposed on some gen7 + GGTT. More recent hardware has dropped |
| 181 | * this explicit setting, where it should now be enabled by default. |
| 182 | */ |
| 183 | I915_CACHE_L3_LLC, |
| 184 | /** |
| 185 | * @I915_CACHE_WT: |
| 186 | * |
| 187 | * Write-through. Used for scanout surfaces. |
| 188 | * |
| 189 | * The GPU can utilise the caches, while still having the display engine |
| 190 | * be coherent with GPU writes, as a result we don't need to flush the |
| 191 | * CPU caches when moving out of the render domain. This is the default |
| 192 | * setting chosen by the kernel, if supported by the HW, otherwise we |
| 193 | * fallback to I915_CACHE_NONE. On the CPU side writes through the CPU |
| 194 | * cache still need to be flushed, to remain coherent with the display |
| 195 | * engine. |
| 196 | */ |
| 197 | I915_CACHE_WT, |
| 198 | /** |
| 199 | * @I915_MAX_CACHE_LEVEL: |
| 200 | * |
| 201 | * Mark the last entry in the enum. Used for defining cachelevel_to_pat |
| 202 | * array for cache_level to pat translation table. |
| 203 | */ |
| 204 | I915_MAX_CACHE_LEVEL, |
| 205 | }; |
| 206 | |
| 207 | enum i915_map_type { |
| 208 | I915_MAP_WB = 0, |
| 209 | I915_MAP_WC, |
| 210 | #define I915_MAP_OVERRIDE BIT(31) |
| 211 | I915_MAP_FORCE_WB = I915_MAP_WB | I915_MAP_OVERRIDE, |
| 212 | I915_MAP_FORCE_WC = I915_MAP_WC | I915_MAP_OVERRIDE, |
| 213 | }; |
| 214 | |
| 215 | enum i915_mmap_type { |
| 216 | I915_MMAP_TYPE_GTT = 0, |
| 217 | I915_MMAP_TYPE_WC, |
| 218 | I915_MMAP_TYPE_WB, |
| 219 | I915_MMAP_TYPE_UC, |
| 220 | I915_MMAP_TYPE_FIXED, |
| 221 | }; |
| 222 | |
| 223 | struct i915_mmap_offset { |
| 224 | struct drm_vma_offset_node vma_node; |
| 225 | struct drm_i915_gem_object *obj; |
| 226 | enum i915_mmap_type mmap_type; |
| 227 | |
| 228 | struct rb_node offset; |
| 229 | }; |
| 230 | |
| 231 | struct i915_gem_object_page_iter { |
| 232 | struct scatterlist *sg_pos; |
| 233 | unsigned int sg_idx; /* in pages, but 32bit eek! */ |
| 234 | |
| 235 | struct radix_tree_root radix; |
| 236 | struct mutex lock; /* protects this cache */ |
| 237 | }; |
| 238 | |
| 239 | struct drm_i915_gem_object { |
| 240 | /* |
| 241 | * We might have reason to revisit the below since it wastes |
| 242 | * a lot of space for non-ttm gem objects. |
| 243 | * In any case, always use the accessors for the ttm_buffer_object |
| 244 | * when accessing it. |
| 245 | */ |
| 246 | union { |
| 247 | struct drm_gem_object base; |
| 248 | struct ttm_buffer_object __do_not_access; |
| 249 | }; |
| 250 | |
| 251 | const struct drm_i915_gem_object_ops *ops; |
| 252 | |
| 253 | struct { |
| 254 | /** |
| 255 | * @vma.lock: protect the list/tree of vmas |
| 256 | */ |
| 257 | spinlock_t lock; |
| 258 | |
| 259 | /** |
| 260 | * @vma.list: List of VMAs backed by this object |
| 261 | * |
| 262 | * The VMA on this list are ordered by type, all GGTT vma are |
| 263 | * placed at the head and all ppGTT vma are placed at the tail. |
| 264 | * The different types of GGTT vma are unordered between |
| 265 | * themselves, use the @vma.tree (which has a defined order |
| 266 | * between all VMA) to quickly find an exact match. |
| 267 | */ |
| 268 | struct list_head list; |
| 269 | |
| 270 | /** |
| 271 | * @vma.tree: Ordered tree of VMAs backed by this object |
| 272 | * |
| 273 | * All VMA created for this object are placed in the @vma.tree |
| 274 | * for fast retrieval via a binary search in |
| 275 | * i915_vma_instance(). They are also added to @vma.list for |
| 276 | * easy iteration. |
| 277 | */ |
| 278 | struct rb_root tree; |
| 279 | } vma; |
| 280 | |
| 281 | /** |
| 282 | * @lut_list: List of vma lookup entries in use for this object. |
| 283 | * |
| 284 | * If this object is closed, we need to remove all of its VMA from |
| 285 | * the fast lookup index in associated contexts; @lut_list provides |
| 286 | * this translation from object to context->handles_vma. |
| 287 | */ |
| 288 | struct list_head lut_list; |
| 289 | spinlock_t lut_lock; /* guards lut_list */ |
| 290 | |
| 291 | /** |
| 292 | * @obj_link: Link into @i915_gem_ww_ctx.obj_list |
| 293 | * |
| 294 | * When we lock this object through i915_gem_object_lock() with a |
| 295 | * context, we add it to the list to ensure we can unlock everything |
| 296 | * when i915_gem_ww_ctx_backoff() or i915_gem_ww_ctx_fini() are called. |
| 297 | */ |
| 298 | struct list_head obj_link; |
| 299 | /** |
| 300 | * @shared_resv_from: The object shares the resv from this vm. |
| 301 | */ |
| 302 | struct i915_address_space *shares_resv_from; |
| 303 | |
| 304 | #ifdef CONFIG_PROC_FS |
| 305 | /** |
| 306 | * @client: @i915_drm_client which created the object |
| 307 | */ |
| 308 | struct i915_drm_client *client; |
| 309 | |
| 310 | /** |
| 311 | * @client_link: Link into @i915_drm_client.objects_list |
| 312 | */ |
| 313 | struct list_head client_link; |
| 314 | #endif |
| 315 | |
| 316 | union { |
| 317 | struct rcu_head rcu; |
| 318 | struct llist_node freed; |
| 319 | }; |
| 320 | |
| 321 | /** |
| 322 | * Whether the object is currently in the GGTT or any other supported |
| 323 | * fake offset mmap backed by lmem. |
| 324 | */ |
| 325 | unsigned int userfault_count; |
| 326 | struct list_head userfault_link; |
| 327 | |
| 328 | struct { |
| 329 | spinlock_t lock; /* Protects access to mmo offsets */ |
| 330 | struct rb_root offsets; |
| 331 | } mmo; |
| 332 | |
| 333 | I915_SELFTEST_DECLARE(struct list_head st_link); |
| 334 | |
| 335 | unsigned long flags; |
| 336 | #define I915_BO_ALLOC_CONTIGUOUS BIT(0) |
| 337 | #define I915_BO_ALLOC_VOLATILE BIT(1) |
| 338 | #define I915_BO_ALLOC_CPU_CLEAR BIT(2) |
| 339 | #define I915_BO_ALLOC_USER BIT(3) |
| 340 | /* Object is allowed to lose its contents on suspend / resume, even if pinned */ |
| 341 | #define I915_BO_ALLOC_PM_VOLATILE BIT(4) |
| 342 | /* Object needs to be restored early using memcpy during resume */ |
| 343 | #define I915_BO_ALLOC_PM_EARLY BIT(5) |
| 344 | /* |
| 345 | * Object is likely never accessed by the CPU. This will prioritise the BO to be |
| 346 | * allocated in the non-mappable portion of lmem. This is merely a hint, and if |
| 347 | * dealing with userspace objects the CPU fault handler is free to ignore this. |
| 348 | */ |
| 349 | #define I915_BO_ALLOC_GPU_ONLY BIT(6) |
| 350 | #define I915_BO_ALLOC_CCS_AUX BIT(7) |
| 351 | /* |
| 352 | * Object is allowed to retain its initial data and will not be cleared on first |
| 353 | * access if used along with I915_BO_ALLOC_USER. This is mainly to keep |
| 354 | * preallocated framebuffer data intact while transitioning it to i915drmfb. |
| 355 | */ |
| 356 | #define I915_BO_PREALLOC BIT(8) |
| 357 | #define I915_BO_ALLOC_FLAGS (I915_BO_ALLOC_CONTIGUOUS | \ |
| 358 | I915_BO_ALLOC_VOLATILE | \ |
| 359 | I915_BO_ALLOC_CPU_CLEAR | \ |
| 360 | I915_BO_ALLOC_USER | \ |
| 361 | I915_BO_ALLOC_PM_VOLATILE | \ |
| 362 | I915_BO_ALLOC_PM_EARLY | \ |
| 363 | I915_BO_ALLOC_GPU_ONLY | \ |
| 364 | I915_BO_ALLOC_CCS_AUX | \ |
| 365 | I915_BO_PREALLOC) |
| 366 | #define I915_BO_READONLY BIT(9) |
| 367 | #define I915_TILING_QUIRK_BIT 10 /* unknown swizzling; do not release! */ |
| 368 | #define I915_BO_PROTECTED BIT(11) |
| 369 | /** |
| 370 | * @mem_flags - Mutable placement-related flags |
| 371 | * |
| 372 | * These are flags that indicate specifics of the memory region |
| 373 | * the object is currently in. As such they are only stable |
| 374 | * either under the object lock or if the object is pinned. |
| 375 | */ |
| 376 | unsigned int mem_flags; |
| 377 | #define I915_BO_FLAG_STRUCT_PAGE BIT(0) /* Object backed by struct pages */ |
| 378 | #define I915_BO_FLAG_IOMEM BIT(1) /* Object backed by IO memory */ |
| 379 | /** |
| 380 | * @pat_index: The desired PAT index. |
| 381 | * |
| 382 | * See hardware specification for valid PAT indices for each platform. |
| 383 | * This field replaces the @cache_level that contains a value of enum |
| 384 | * i915_cache_level since PAT indices are being used by both userspace |
| 385 | * and kernel mode driver for caching policy control after GEN12. |
| 386 | * In the meantime platform specific tables are created to translate |
| 387 | * i915_cache_level into pat index, for more details check the macros |
| 388 | * defined i915/i915_pci.c, e.g. TGL_CACHELEVEL. |
| 389 | * For backward compatibility, this field contains values exactly match |
| 390 | * the entries of enum i915_cache_level for pre-GEN12 platforms (See |
| 391 | * LEGACY_CACHELEVEL), so that the PTE encode functions for these |
| 392 | * legacy platforms can stay the same. |
| 393 | */ |
| 394 | unsigned int pat_index:6; |
| 395 | /** |
| 396 | * @pat_set_by_user: Indicate whether pat_index is set by user space |
| 397 | * |
| 398 | * This field is set to false by default, only set to true if the |
| 399 | * pat_index is set by user space. By design, user space is capable of |
| 400 | * managing caching behavior by setting pat_index, in which case this |
| 401 | * kernel mode driver should never touch the pat_index. |
| 402 | */ |
| 403 | unsigned int pat_set_by_user:1; |
| 404 | /** |
| 405 | * @cache_coherent: |
| 406 | * |
| 407 | * Note: with the change above which replaced @cache_level with pat_index, |
| 408 | * the use of @cache_coherent is limited to the objects created by kernel |
| 409 | * or by userspace without pat index specified. |
| 410 | * Check for @pat_set_by_user to find out if an object has pat index set |
| 411 | * by userspace. The ioctl's to change cache settings have also been |
| 412 | * disabled for the objects with pat index set by userspace. Please don't |
| 413 | * assume @cache_coherent having the flags set as describe here. A helper |
| 414 | * function i915_gem_object_has_cache_level() provides one way to bypass |
| 415 | * the use of this field. |
| 416 | * |
| 417 | * Track whether the pages are coherent with the GPU if reading or |
| 418 | * writing through the CPU caches. The largely depends on the |
| 419 | * @cache_level setting. |
| 420 | * |
| 421 | * On platforms which don't have the shared LLC(HAS_SNOOP), like on Atom |
| 422 | * platforms, coherency must be explicitly requested with some special |
| 423 | * GTT caching bits(see enum i915_cache_level). When enabling coherency |
| 424 | * it does come at a performance and power cost on such platforms. On |
| 425 | * the flip side the kernel does not need to manually flush any buffers |
| 426 | * which need to be coherent with the GPU, if the object is not coherent |
| 427 | * i.e @cache_coherent is zero. |
| 428 | * |
| 429 | * On platforms that share the LLC with the CPU(HAS_LLC), all GT memory |
| 430 | * access will automatically snoop the CPU caches(even with CACHE_NONE). |
| 431 | * The one exception is when dealing with the display engine, like with |
| 432 | * scanout surfaces. To handle this the kernel will always flush the |
| 433 | * surface out of the CPU caches when preparing it for scanout. Also |
| 434 | * note that since scanout surfaces are only ever read by the display |
| 435 | * engine we only need to care about flushing any writes through the CPU |
| 436 | * cache, reads on the other hand will always be coherent. |
| 437 | * |
| 438 | * Something strange here is why @cache_coherent is not a simple |
| 439 | * boolean, i.e coherent vs non-coherent. The reasoning for this is back |
| 440 | * to the display engine not being fully coherent. As a result scanout |
| 441 | * surfaces will either be marked as I915_CACHE_NONE or I915_CACHE_WT. |
| 442 | * In the case of seeing I915_CACHE_NONE the kernel makes the assumption |
| 443 | * that this is likely a scanout surface, and will set @cache_coherent |
| 444 | * as only I915_BO_CACHE_COHERENT_FOR_READ, on platforms with the shared |
| 445 | * LLC. The kernel uses this to always flush writes through the CPU |
| 446 | * cache as early as possible, where it can, in effect keeping |
| 447 | * @cache_dirty clean, so we can potentially avoid stalling when |
| 448 | * flushing the surface just before doing the scanout. This does mean |
| 449 | * we might unnecessarily flush non-scanout objects in some places, but |
| 450 | * the default assumption is that all normal objects should be using |
| 451 | * I915_CACHE_LLC, at least on platforms with the shared LLC. |
| 452 | * |
| 453 | * Supported values: |
| 454 | * |
| 455 | * I915_BO_CACHE_COHERENT_FOR_READ: |
| 456 | * |
| 457 | * On shared LLC platforms, we use this for special scanout surfaces, |
| 458 | * where the display engine is not coherent with the CPU cache. As such |
| 459 | * we need to ensure we flush any writes before doing the scanout. As an |
| 460 | * optimisation we try to flush any writes as early as possible to avoid |
| 461 | * stalling later. |
| 462 | * |
| 463 | * Thus for scanout surfaces using I915_CACHE_NONE, on shared LLC |
| 464 | * platforms, we use: |
| 465 | * |
| 466 | * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ |
| 467 | * |
| 468 | * While for normal objects that are fully coherent, including special |
| 469 | * scanout surfaces marked as I915_CACHE_WT, we use: |
| 470 | * |
| 471 | * cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ | |
| 472 | * I915_BO_CACHE_COHERENT_FOR_WRITE |
| 473 | * |
| 474 | * And then for objects that are not coherent at all we use: |
| 475 | * |
| 476 | * cache_coherent = 0 |
| 477 | * |
| 478 | * I915_BO_CACHE_COHERENT_FOR_WRITE: |
| 479 | * |
| 480 | * When writing through the CPU cache, the GPU is still coherent. Note |
| 481 | * that this also implies I915_BO_CACHE_COHERENT_FOR_READ. |
| 482 | */ |
| 483 | #define I915_BO_CACHE_COHERENT_FOR_READ BIT(0) |
| 484 | #define I915_BO_CACHE_COHERENT_FOR_WRITE BIT(1) |
| 485 | unsigned int cache_coherent:2; |
| 486 | |
| 487 | /** |
| 488 | * @cache_dirty: |
| 489 | * |
| 490 | * Note: with the change above which replaced cache_level with pat_index, |
| 491 | * the use of @cache_dirty is limited to the objects created by kernel |
| 492 | * or by userspace without pat index specified. |
| 493 | * Check for @pat_set_by_user to find out if an object has pat index set |
| 494 | * by userspace. The ioctl's to change cache settings have also been |
| 495 | * disabled for the objects with pat_index set by userspace. Please don't |
| 496 | * assume @cache_dirty is set as describe here. Also see helper function |
| 497 | * i915_gem_object_has_cache_level() for possible ways to bypass the use |
| 498 | * of this field. |
| 499 | * |
| 500 | * Track if we are we dirty with writes through the CPU cache for this |
| 501 | * object. As a result reading directly from main memory might yield |
| 502 | * stale data. |
| 503 | * |
| 504 | * This also ties into whether the kernel is tracking the object as |
| 505 | * coherent with the GPU, as per @cache_coherent, as it determines if |
| 506 | * flushing might be needed at various points. |
| 507 | * |
| 508 | * Another part of @cache_dirty is managing flushing when first |
| 509 | * acquiring the pages for system memory, at this point the pages are |
| 510 | * considered foreign, so the default assumption is that the cache is |
| 511 | * dirty, for example the page zeroing done by the kernel might leave |
| 512 | * writes though the CPU cache, or swapping-in, while the actual data in |
| 513 | * main memory is potentially stale. Note that this is a potential |
| 514 | * security issue when dealing with userspace objects and zeroing. Now, |
| 515 | * whether we actually need apply the big sledgehammer of flushing all |
| 516 | * the pages on acquire depends on if @cache_coherent is marked as |
| 517 | * I915_BO_CACHE_COHERENT_FOR_WRITE, i.e that the GPU will be coherent |
| 518 | * for both reads and writes though the CPU cache. |
| 519 | * |
| 520 | * Note that on shared LLC platforms we still apply the heavy flush for |
| 521 | * I915_CACHE_NONE objects, under the assumption that this is going to |
| 522 | * be used for scanout. |
| 523 | * |
| 524 | * Update: On some hardware there is now also the 'Bypass LLC' MOCS |
| 525 | * entry, which defeats our @cache_coherent tracking, since userspace |
| 526 | * can freely bypass the CPU cache when touching the pages with the GPU, |
| 527 | * where the kernel is completely unaware. On such platform we need |
| 528 | * apply the sledgehammer-on-acquire regardless of the @cache_coherent. |
| 529 | * |
| 530 | * Special care is taken on non-LLC platforms, to prevent potential |
| 531 | * information leak. The driver currently ensures: |
| 532 | * |
| 533 | * 1. All userspace objects, by default, have @cache_level set as |
| 534 | * I915_CACHE_NONE. The only exception is userptr objects, where we |
| 535 | * instead force I915_CACHE_LLC, but we also don't allow userspace to |
| 536 | * ever change the @cache_level for such objects. Another special case |
| 537 | * is dma-buf, which doesn't rely on @cache_dirty, but there we |
| 538 | * always do a forced flush when acquiring the pages, if there is a |
| 539 | * chance that the pages can be read directly from main memory with |
| 540 | * the GPU. |
| 541 | * |
| 542 | * 2. All I915_CACHE_NONE objects have @cache_dirty initially true. |
| 543 | * |
| 544 | * 3. All swapped-out objects(i.e shmem) have @cache_dirty set to |
| 545 | * true. |
| 546 | * |
| 547 | * 4. The @cache_dirty is never freely reset before the initial |
| 548 | * flush, even if userspace adjusts the @cache_level through the |
| 549 | * i915_gem_set_caching_ioctl. |
| 550 | * |
| 551 | * 5. All @cache_dirty objects(including swapped-in) are initially |
| 552 | * flushed with a synchronous call to drm_clflush_sg in |
| 553 | * __i915_gem_object_set_pages. The @cache_dirty can be freely reset |
| 554 | * at this point. All further asynchronous clfushes are never security |
| 555 | * critical, i.e userspace is free to race against itself. |
| 556 | */ |
| 557 | unsigned int cache_dirty:1; |
| 558 | |
| 559 | /* @is_dpt: Object houses a display page table (DPT) */ |
| 560 | unsigned int is_dpt:1; |
| 561 | |
| 562 | /** |
| 563 | * @read_domains: Read memory domains. |
| 564 | * |
| 565 | * These monitor which caches contain read/write data related to the |
| 566 | * object. When transitioning from one set of domains to another, |
| 567 | * the driver is called to ensure that caches are suitably flushed and |
| 568 | * invalidated. |
| 569 | */ |
| 570 | u16 read_domains; |
| 571 | |
| 572 | /** |
| 573 | * @write_domain: Corresponding unique write memory domain. |
| 574 | */ |
| 575 | u16 write_domain; |
| 576 | |
| 577 | struct i915_frontbuffer __rcu *frontbuffer; |
| 578 | |
| 579 | /** Current tiling stride for the object, if it's tiled. */ |
| 580 | unsigned int tiling_and_stride; |
| 581 | #define FENCE_MINIMUM_STRIDE 128 /* See i915_tiling_ok() */ |
| 582 | #define TILING_MASK (FENCE_MINIMUM_STRIDE - 1) |
| 583 | #define STRIDE_MASK (~TILING_MASK) |
| 584 | |
| 585 | struct { |
| 586 | /* |
| 587 | * Protects the pages and their use. Do not use directly, but |
| 588 | * instead go through the pin/unpin interfaces. |
| 589 | */ |
| 590 | atomic_t pages_pin_count; |
| 591 | |
| 592 | /** |
| 593 | * @shrink_pin: Prevents the pages from being made visible to |
| 594 | * the shrinker, while the shrink_pin is non-zero. Most users |
| 595 | * should pretty much never have to care about this, outside of |
| 596 | * some special use cases. |
| 597 | * |
| 598 | * By default most objects will start out as visible to the |
| 599 | * shrinker(if I915_GEM_OBJECT_IS_SHRINKABLE) as soon as the |
| 600 | * backing pages are attached to the object, like in |
| 601 | * __i915_gem_object_set_pages(). They will then be removed the |
| 602 | * shrinker list once the pages are released. |
| 603 | * |
| 604 | * The @shrink_pin is incremented by calling |
| 605 | * i915_gem_object_make_unshrinkable(), which will also remove |
| 606 | * the object from the shrinker list, if the pin count was zero. |
| 607 | * |
| 608 | * Callers will then typically call |
| 609 | * i915_gem_object_make_shrinkable() or |
| 610 | * i915_gem_object_make_purgeable() to decrement the pin count, |
| 611 | * and make the pages visible again. |
| 612 | */ |
| 613 | atomic_t shrink_pin; |
| 614 | |
| 615 | /** |
| 616 | * @ttm_shrinkable: True when the object is using shmem pages |
| 617 | * underneath. Protected by the object lock. |
| 618 | */ |
| 619 | bool ttm_shrinkable; |
| 620 | |
| 621 | /** |
| 622 | * @unknown_state: Indicate that the object is effectively |
| 623 | * borked. This is write-once and set if we somehow encounter a |
| 624 | * fatal error when moving/clearing the pages, and we are not |
| 625 | * able to fallback to memcpy/memset, like on small-BAR systems. |
| 626 | * The GPU should also be wedged (or in the process) at this |
| 627 | * point. |
| 628 | * |
| 629 | * Only valid to read this after acquiring the dma-resv lock and |
| 630 | * waiting for all DMA_RESV_USAGE_KERNEL fences to be signalled, |
| 631 | * or if we otherwise know that the moving fence has signalled, |
| 632 | * and we are certain the pages underneath are valid for |
| 633 | * immediate access (under normal operation), like just prior to |
| 634 | * binding the object or when setting up the CPU fault handler. |
| 635 | * See i915_gem_object_has_unknown_state(); |
| 636 | */ |
| 637 | bool unknown_state; |
| 638 | |
| 639 | /** |
| 640 | * Priority list of potential placements for this object. |
| 641 | */ |
| 642 | struct intel_memory_region **placements; |
| 643 | int n_placements; |
| 644 | |
| 645 | /** |
| 646 | * Memory region for this object. |
| 647 | */ |
| 648 | struct intel_memory_region *region; |
| 649 | |
| 650 | /** |
| 651 | * Memory manager resource allocated for this object. Only |
| 652 | * needed for the mock region. |
| 653 | */ |
| 654 | struct ttm_resource *res; |
| 655 | |
| 656 | /** |
| 657 | * Element within memory_region->objects or region->purgeable |
| 658 | * if the object is marked as DONTNEED. Access is protected by |
| 659 | * region->obj_lock. |
| 660 | */ |
| 661 | struct list_head region_link; |
| 662 | |
| 663 | struct i915_refct_sgt *rsgt; |
| 664 | struct sg_table *pages; |
| 665 | void *mapping; |
| 666 | |
| 667 | struct i915_page_sizes page_sizes; |
| 668 | |
| 669 | I915_SELFTEST_DECLARE(unsigned int page_mask); |
| 670 | |
| 671 | struct i915_gem_object_page_iter get_page; |
| 672 | struct i915_gem_object_page_iter get_dma_page; |
| 673 | |
| 674 | /** |
| 675 | * Element within i915->mm.shrink_list or i915->mm.purge_list, |
| 676 | * locked by i915->mm.obj_lock. |
| 677 | */ |
| 678 | struct list_head link; |
| 679 | |
| 680 | /** |
| 681 | * Advice: are the backing pages purgeable? |
| 682 | */ |
| 683 | unsigned int madv:2; |
| 684 | |
| 685 | /** |
| 686 | * This is set if the object has been written to since the |
| 687 | * pages were last acquired. |
| 688 | */ |
| 689 | bool dirty:1; |
| 690 | |
| 691 | u32 tlb[I915_MAX_GT]; |
| 692 | } mm; |
| 693 | |
| 694 | struct { |
| 695 | struct i915_refct_sgt *cached_io_rsgt; |
| 696 | struct i915_gem_object_page_iter get_io_page; |
| 697 | struct drm_i915_gem_object *backup; |
| 698 | bool created:1; |
| 699 | } ttm; |
| 700 | |
| 701 | /* |
| 702 | * Record which PXP key instance this object was created against (if |
| 703 | * any), so we can use it to determine if the encryption is valid by |
| 704 | * comparing against the current key instance. |
| 705 | */ |
| 706 | u32 pxp_key_instance; |
| 707 | |
| 708 | /** Record of address bit 17 of each page at last unbind. */ |
| 709 | unsigned long *bit_17; |
| 710 | |
| 711 | union { |
| 712 | #ifdef CONFIG_MMU_NOTIFIER |
| 713 | struct i915_gem_userptr { |
| 714 | uintptr_t ptr; |
| 715 | unsigned long notifier_seq; |
| 716 | |
| 717 | struct mmu_interval_notifier notifier; |
| 718 | struct page **pvec; |
| 719 | int page_ref; |
| 720 | } userptr; |
| 721 | #endif |
| 722 | |
| 723 | struct drm_mm_node *stolen; |
| 724 | |
| 725 | resource_size_t bo_offset; |
| 726 | |
| 727 | unsigned long scratch; |
| 728 | u64 encode; |
| 729 | |
| 730 | void *gvt_info; |
| 731 | }; |
| 732 | }; |
| 733 | |
| 734 | #define intel_bo_to_drm_bo(bo) (&(bo)->base) |
| 735 | #define intel_bo_to_i915(bo) to_i915(intel_bo_to_drm_bo(bo)->dev) |
| 736 | |
| 737 | static inline struct drm_i915_gem_object * |
| 738 | to_intel_bo(struct drm_gem_object *gem) |
| 739 | { |
| 740 | /* Assert that to_intel_bo(NULL) == NULL */ |
| 741 | BUILD_BUG_ON(offsetof(struct drm_i915_gem_object, base)); |
| 742 | |
| 743 | return container_of(gem, struct drm_i915_gem_object, base); |
| 744 | } |
| 745 | |
| 746 | #endif |
| 747 | |