| 1 | /* SPDX-License-Identifier: MIT */ |
| 2 | #ifndef _INTEL_RINGBUFFER_H_ |
| 3 | #define _INTEL_RINGBUFFER_H_ |
| 4 | |
| 5 | #include <asm/cacheflush.h> |
| 6 | #include <drm/drm_util.h> |
| 7 | #include <drm/drm_cache.h> |
| 8 | |
| 9 | #include <linux/hashtable.h> |
| 10 | #include <linux/irq_work.h> |
| 11 | #include <linux/random.h> |
| 12 | #include <linux/seqlock.h> |
| 13 | |
| 14 | #include "i915_pmu.h" |
| 15 | #include "i915_request.h" |
| 16 | #include "i915_selftest.h" |
| 17 | #include "intel_engine_types.h" |
| 18 | #include "intel_gt_types.h" |
| 19 | #include "intel_timeline.h" |
| 20 | #include "intel_workarounds.h" |
| 21 | |
| 22 | struct drm_printer; |
| 23 | struct intel_context; |
| 24 | struct intel_gt; |
| 25 | struct lock_class_key; |
| 26 | |
| 27 | /* Early gen2 devices have a cacheline of just 32 bytes, using 64 is overkill, |
| 28 | * but keeps the logic simple. Indeed, the whole purpose of this macro is just |
| 29 | * to give some inclination as to some of the magic values used in the various |
| 30 | * workarounds! |
| 31 | */ |
| 32 | #define CACHELINE_BYTES 64 |
| 33 | #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(u32)) |
| 34 | |
| 35 | #define ENGINE_TRACE(e, fmt, ...) do { \ |
| 36 | const struct intel_engine_cs *e__ __maybe_unused = (e); \ |
| 37 | GEM_TRACE("%s %s: " fmt, \ |
| 38 | dev_name(e__->i915->drm.dev), e__->name, \ |
| 39 | ##__VA_ARGS__); \ |
| 40 | } while (0) |
| 41 | |
| 42 | /* |
| 43 | * The register defines to be used with the following macros need to accept a |
| 44 | * base param, e.g: |
| 45 | * |
| 46 | * REG_FOO(base) _MMIO((base) + <relative offset>) |
| 47 | * ENGINE_READ(engine, REG_FOO); |
| 48 | * |
| 49 | * register arrays are to be defined and accessed as follows: |
| 50 | * |
| 51 | * REG_BAR(base, i) _MMIO((base) + <relative offset> + (i) * <shift>) |
| 52 | * ENGINE_READ_IDX(engine, REG_BAR, i) |
| 53 | */ |
| 54 | |
| 55 | #define __ENGINE_REG_OP(op__, engine__, ...) \ |
| 56 | intel_uncore_##op__((engine__)->uncore, __VA_ARGS__) |
| 57 | |
| 58 | #define __ENGINE_READ_OP(op__, engine__, reg__) \ |
| 59 | __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base)) |
| 60 | |
| 61 | #define ENGINE_READ16(...) __ENGINE_READ_OP(read16, __VA_ARGS__) |
| 62 | #define ENGINE_READ(...) __ENGINE_READ_OP(read, __VA_ARGS__) |
| 63 | #define ENGINE_READ_FW(...) __ENGINE_READ_OP(read_fw, __VA_ARGS__) |
| 64 | #define ENGINE_POSTING_READ(...) __ENGINE_READ_OP(posting_read_fw, __VA_ARGS__) |
| 65 | #define ENGINE_POSTING_READ16(...) __ENGINE_READ_OP(posting_read16, __VA_ARGS__) |
| 66 | |
| 67 | #define ENGINE_READ64(engine__, lower_reg__, upper_reg__) \ |
| 68 | __ENGINE_REG_OP(read64_2x32, (engine__), \ |
| 69 | lower_reg__((engine__)->mmio_base), \ |
| 70 | upper_reg__((engine__)->mmio_base)) |
| 71 | |
| 72 | #define ENGINE_READ_IDX(engine__, reg__, idx__) \ |
| 73 | __ENGINE_REG_OP(read, (engine__), reg__((engine__)->mmio_base, (idx__))) |
| 74 | |
| 75 | #define __ENGINE_WRITE_OP(op__, engine__, reg__, val__) \ |
| 76 | __ENGINE_REG_OP(op__, (engine__), reg__((engine__)->mmio_base), (val__)) |
| 77 | |
| 78 | #define ENGINE_WRITE16(...) __ENGINE_WRITE_OP(write16, __VA_ARGS__) |
| 79 | #define ENGINE_WRITE(...) __ENGINE_WRITE_OP(write, __VA_ARGS__) |
| 80 | #define ENGINE_WRITE_FW(...) __ENGINE_WRITE_OP(write_fw, __VA_ARGS__) |
| 81 | |
| 82 | #define __HAS_ENGINE(engine_mask, id) ((engine_mask) & BIT(id)) |
| 83 | #define HAS_ENGINE(gt, id) __HAS_ENGINE((gt)->info.engine_mask, id) |
| 84 | |
| 85 | #define __ENGINE_INSTANCES_MASK(mask, first, count) ({ \ |
| 86 | unsigned int first__ = (first); \ |
| 87 | unsigned int count__ = (count); \ |
| 88 | ((mask) & GENMASK(first__ + count__ - 1, first__)) >> first__; \ |
| 89 | }) |
| 90 | |
| 91 | #define ENGINE_INSTANCES_MASK(gt, first, count) \ |
| 92 | __ENGINE_INSTANCES_MASK((gt)->info.engine_mask, first, count) |
| 93 | |
| 94 | #define RCS_MASK(gt) \ |
| 95 | ENGINE_INSTANCES_MASK(gt, RCS0, I915_MAX_RCS) |
| 96 | #define BCS_MASK(gt) \ |
| 97 | ENGINE_INSTANCES_MASK(gt, BCS0, I915_MAX_BCS) |
| 98 | #define VDBOX_MASK(gt) \ |
| 99 | ENGINE_INSTANCES_MASK(gt, VCS0, I915_MAX_VCS) |
| 100 | #define VEBOX_MASK(gt) \ |
| 101 | ENGINE_INSTANCES_MASK(gt, VECS0, I915_MAX_VECS) |
| 102 | #define CCS_MASK(gt) \ |
| 103 | ENGINE_INSTANCES_MASK(gt, CCS0, I915_MAX_CCS) |
| 104 | |
| 105 | #define GEN6_RING_FAULT_REG_READ(engine__) \ |
| 106 | intel_uncore_read((engine__)->uncore, RING_FAULT_REG(engine__)) |
| 107 | |
| 108 | #define GEN6_RING_FAULT_REG_POSTING_READ(engine__) \ |
| 109 | intel_uncore_posting_read((engine__)->uncore, RING_FAULT_REG(engine__)) |
| 110 | |
| 111 | #define GEN6_RING_FAULT_REG_RMW(engine__, clear__, set__) \ |
| 112 | ({ \ |
| 113 | u32 __val; \ |
| 114 | \ |
| 115 | __val = intel_uncore_read((engine__)->uncore, \ |
| 116 | RING_FAULT_REG(engine__)); \ |
| 117 | __val &= ~(clear__); \ |
| 118 | __val |= (set__); \ |
| 119 | intel_uncore_write((engine__)->uncore, RING_FAULT_REG(engine__), \ |
| 120 | __val); \ |
| 121 | }) |
| 122 | |
| 123 | /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to |
| 124 | * do the writes, and that must have qw aligned offsets, simply pretend it's 8b. |
| 125 | */ |
| 126 | |
| 127 | static inline unsigned int |
| 128 | execlists_num_ports(const struct intel_engine_execlists * const execlists) |
| 129 | { |
| 130 | return execlists->port_mask + 1; |
| 131 | } |
| 132 | |
| 133 | static inline struct i915_request * |
| 134 | execlists_active(const struct intel_engine_execlists *execlists) |
| 135 | { |
| 136 | struct i915_request * const *cur, * const *old, *active; |
| 137 | |
| 138 | cur = READ_ONCE(execlists->active); |
| 139 | smp_rmb(); /* pairs with overwrite protection in process_csb() */ |
| 140 | do { |
| 141 | old = cur; |
| 142 | |
| 143 | active = READ_ONCE(*cur); |
| 144 | cur = READ_ONCE(execlists->active); |
| 145 | |
| 146 | smp_rmb(); /* and complete the seqlock retry */ |
| 147 | } while (unlikely(cur != old)); |
| 148 | |
| 149 | return active; |
| 150 | } |
| 151 | |
| 152 | static inline u32 |
| 153 | intel_read_status_page(const struct intel_engine_cs *engine, int reg) |
| 154 | { |
| 155 | /* Ensure that the compiler doesn't optimize away the load. */ |
| 156 | return READ_ONCE(engine->status_page.addr[reg]); |
| 157 | } |
| 158 | |
| 159 | static inline void |
| 160 | intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value) |
| 161 | { |
| 162 | /* Writing into the status page should be done sparingly. Since |
| 163 | * we do when we are uncertain of the device state, we take a bit |
| 164 | * of extra paranoia to try and ensure that the HWS takes the value |
| 165 | * we give and that it doesn't end up trapped inside the CPU! |
| 166 | */ |
| 167 | drm_clflush_virt_range(addr: &engine->status_page.addr[reg], length: sizeof(value)); |
| 168 | WRITE_ONCE(engine->status_page.addr[reg], value); |
| 169 | drm_clflush_virt_range(addr: &engine->status_page.addr[reg], length: sizeof(value)); |
| 170 | } |
| 171 | |
| 172 | /* |
| 173 | * Reads a dword out of the status page, which is written to from the command |
| 174 | * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or |
| 175 | * MI_STORE_DATA_IMM. |
| 176 | * |
| 177 | * The following dwords have a reserved meaning: |
| 178 | * 0x00: ISR copy, updated when an ISR bit not set in the HWSTAM changes. |
| 179 | * 0x04: ring 0 head pointer |
| 180 | * 0x05: ring 1 head pointer (915-class) |
| 181 | * 0x06: ring 2 head pointer (915-class) |
| 182 | * 0x10-0x1b: Context status DWords (GM45) |
| 183 | * 0x1f: Last written status offset. (GM45) |
| 184 | * 0x20-0x2f: Reserved (Gen6+) |
| 185 | * |
| 186 | * The area from dword 0x30 to 0x3ff is available for driver usage. |
| 187 | */ |
| 188 | #define I915_GEM_HWS_PREEMPT 0x32 |
| 189 | #define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT * sizeof(u32)) |
| 190 | #define I915_GEM_HWS_SEQNO 0x40 |
| 191 | #define I915_GEM_HWS_SEQNO_ADDR (I915_GEM_HWS_SEQNO * sizeof(u32)) |
| 192 | #define I915_GEM_HWS_MIGRATE (0x42 * sizeof(u32)) |
| 193 | #define I915_GEM_HWS_GGTT_BIND 0x46 |
| 194 | #define I915_GEM_HWS_GGTT_BIND_ADDR (I915_GEM_HWS_GGTT_BIND * sizeof(u32)) |
| 195 | #define I915_GEM_HWS_PXP 0x60 |
| 196 | #define I915_GEM_HWS_PXP_ADDR (I915_GEM_HWS_PXP * sizeof(u32)) |
| 197 | #define I915_GEM_HWS_GSC 0x62 |
| 198 | #define I915_GEM_HWS_GSC_ADDR (I915_GEM_HWS_GSC * sizeof(u32)) |
| 199 | #define I915_GEM_HWS_SCRATCH 0x80 |
| 200 | |
| 201 | #define I915_HWS_CSB_BUF0_INDEX 0x10 |
| 202 | #define I915_HWS_CSB_WRITE_INDEX 0x1f |
| 203 | #define ICL_HWS_CSB_WRITE_INDEX 0x2f |
| 204 | #define INTEL_HWS_CSB_WRITE_INDEX(__i915) \ |
| 205 | (GRAPHICS_VER(__i915) >= 11 ? ICL_HWS_CSB_WRITE_INDEX : I915_HWS_CSB_WRITE_INDEX) |
| 206 | |
| 207 | void intel_engine_stop(struct intel_engine_cs *engine); |
| 208 | void intel_engine_cleanup(struct intel_engine_cs *engine); |
| 209 | |
| 210 | int intel_engines_init_mmio(struct intel_gt *gt); |
| 211 | int intel_engines_init(struct intel_gt *gt); |
| 212 | |
| 213 | void intel_engine_free_request_pool(struct intel_engine_cs *engine); |
| 214 | |
| 215 | void intel_engines_release(struct intel_gt *gt); |
| 216 | void intel_engines_free(struct intel_gt *gt); |
| 217 | |
| 218 | int intel_engine_init_common(struct intel_engine_cs *engine); |
| 219 | void intel_engine_cleanup_common(struct intel_engine_cs *engine); |
| 220 | |
| 221 | int intel_engine_resume(struct intel_engine_cs *engine); |
| 222 | |
| 223 | int intel_ring_submission_setup(struct intel_engine_cs *engine); |
| 224 | |
| 225 | int intel_engine_stop_cs(struct intel_engine_cs *engine); |
| 226 | void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine); |
| 227 | |
| 228 | void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine); |
| 229 | |
| 230 | void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask); |
| 231 | |
| 232 | u64 intel_engine_get_active_head(const struct intel_engine_cs *engine); |
| 233 | u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine); |
| 234 | |
| 235 | void intel_engine_get_instdone(const struct intel_engine_cs *engine, |
| 236 | struct intel_instdone *instdone); |
| 237 | |
| 238 | void intel_engine_init_execlists(struct intel_engine_cs *engine); |
| 239 | |
| 240 | bool intel_engine_irq_enable(struct intel_engine_cs *engine); |
| 241 | void intel_engine_irq_disable(struct intel_engine_cs *engine); |
| 242 | |
| 243 | static inline void __intel_engine_reset(struct intel_engine_cs *engine, |
| 244 | bool stalled) |
| 245 | { |
| 246 | if (engine->reset.rewind) |
| 247 | engine->reset.rewind(engine, stalled); |
| 248 | engine->serial++; /* contexts lost */ |
| 249 | } |
| 250 | |
| 251 | bool intel_engines_are_idle(struct intel_gt *gt); |
| 252 | bool intel_engine_is_idle(struct intel_engine_cs *engine); |
| 253 | |
| 254 | void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync); |
| 255 | static inline void intel_engine_flush_submission(struct intel_engine_cs *engine) |
| 256 | { |
| 257 | __intel_engine_flush_submission(engine, sync: true); |
| 258 | } |
| 259 | |
| 260 | void intel_engines_reset_default_submission(struct intel_gt *gt); |
| 261 | |
| 262 | bool intel_engine_can_store_dword(struct intel_engine_cs *engine); |
| 263 | |
| 264 | __printf(3, 4) |
| 265 | void intel_engine_dump(struct intel_engine_cs *engine, |
| 266 | struct drm_printer *m, |
| 267 | const char *, ...); |
| 268 | void intel_engine_dump_active_requests(struct list_head *requests, |
| 269 | struct i915_request *hung_rq, |
| 270 | struct drm_printer *m); |
| 271 | |
| 272 | ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, |
| 273 | ktime_t *now); |
| 274 | |
| 275 | void intel_engine_get_hung_entity(struct intel_engine_cs *engine, |
| 276 | struct intel_context **ce, struct i915_request **rq); |
| 277 | |
| 278 | u32 intel_engine_context_size(struct intel_gt *gt, u8 class); |
| 279 | struct intel_context * |
| 280 | intel_engine_create_pinned_context(struct intel_engine_cs *engine, |
| 281 | struct i915_address_space *vm, |
| 282 | unsigned int ring_size, |
| 283 | unsigned int hwsp, |
| 284 | struct lock_class_key *key, |
| 285 | const char *name); |
| 286 | |
| 287 | void intel_engine_destroy_pinned_context(struct intel_context *ce); |
| 288 | |
| 289 | void xehp_enable_ccs_engines(struct intel_engine_cs *engine); |
| 290 | |
| 291 | #define ENGINE_PHYSICAL 0 |
| 292 | #define ENGINE_MOCK 1 |
| 293 | #define ENGINE_VIRTUAL 2 |
| 294 | |
| 295 | static inline bool intel_engine_uses_guc(const struct intel_engine_cs *engine) |
| 296 | { |
| 297 | return engine->gt->submission_method >= INTEL_SUBMISSION_GUC; |
| 298 | } |
| 299 | |
| 300 | static inline bool |
| 301 | intel_engine_has_preempt_reset(const struct intel_engine_cs *engine) |
| 302 | { |
| 303 | if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) |
| 304 | return false; |
| 305 | |
| 306 | return intel_engine_has_preemption(engine); |
| 307 | } |
| 308 | |
| 309 | #define FORCE_VIRTUAL BIT(0) |
| 310 | struct intel_context * |
| 311 | intel_engine_create_virtual(struct intel_engine_cs **siblings, |
| 312 | unsigned int count, unsigned long flags); |
| 313 | |
| 314 | static inline struct intel_context * |
| 315 | intel_engine_create_parallel(struct intel_engine_cs **engines, |
| 316 | unsigned int num_engines, |
| 317 | unsigned int width) |
| 318 | { |
| 319 | GEM_BUG_ON(!engines[0]->cops->create_parallel); |
| 320 | return engines[0]->cops->create_parallel(engines, num_engines, width); |
| 321 | } |
| 322 | |
| 323 | static inline bool |
| 324 | intel_virtual_engine_has_heartbeat(const struct intel_engine_cs *engine) |
| 325 | { |
| 326 | /* |
| 327 | * For non-GuC submission we expect the back-end to look at the |
| 328 | * heartbeat status of the actual physical engine that the work |
| 329 | * has been (or is being) scheduled on, so we should only reach |
| 330 | * here with GuC submission enabled. |
| 331 | */ |
| 332 | GEM_BUG_ON(!intel_engine_uses_guc(engine)); |
| 333 | |
| 334 | return intel_guc_virtual_engine_has_heartbeat(ve: engine); |
| 335 | } |
| 336 | |
| 337 | static inline bool |
| 338 | intel_engine_has_heartbeat(const struct intel_engine_cs *engine) |
| 339 | { |
| 340 | if (!CONFIG_DRM_I915_HEARTBEAT_INTERVAL) |
| 341 | return false; |
| 342 | |
| 343 | if (intel_engine_is_virtual(engine)) |
| 344 | return intel_virtual_engine_has_heartbeat(engine); |
| 345 | else |
| 346 | return READ_ONCE(engine->props.heartbeat_interval_ms); |
| 347 | } |
| 348 | |
| 349 | static inline struct intel_engine_cs * |
| 350 | intel_engine_get_sibling(struct intel_engine_cs *engine, unsigned int sibling) |
| 351 | { |
| 352 | GEM_BUG_ON(!intel_engine_is_virtual(engine)); |
| 353 | return engine->cops->get_sibling(engine, sibling); |
| 354 | } |
| 355 | |
| 356 | static inline void |
| 357 | intel_engine_set_hung_context(struct intel_engine_cs *engine, |
| 358 | struct intel_context *ce) |
| 359 | { |
| 360 | engine->hung_ce = ce; |
| 361 | } |
| 362 | |
| 363 | static inline void |
| 364 | intel_engine_clear_hung_context(struct intel_engine_cs *engine) |
| 365 | { |
| 366 | intel_engine_set_hung_context(engine, NULL); |
| 367 | } |
| 368 | |
| 369 | static inline struct intel_context * |
| 370 | intel_engine_get_hung_context(struct intel_engine_cs *engine) |
| 371 | { |
| 372 | return engine->hung_ce; |
| 373 | } |
| 374 | |
| 375 | u64 intel_clamp_heartbeat_interval_ms(struct intel_engine_cs *engine, u64 value); |
| 376 | u64 intel_clamp_max_busywait_duration_ns(struct intel_engine_cs *engine, u64 value); |
| 377 | u64 intel_clamp_preempt_timeout_ms(struct intel_engine_cs *engine, u64 value); |
| 378 | u64 intel_clamp_stop_timeout_ms(struct intel_engine_cs *engine, u64 value); |
| 379 | u64 intel_clamp_timeslice_duration_ms(struct intel_engine_cs *engine, u64 value); |
| 380 | |
| 381 | #define rb_to_uabi_engine(rb) \ |
| 382 | rb_entry_safe(rb, struct intel_engine_cs, uabi_node) |
| 383 | |
| 384 | #define for_each_uabi_engine(engine__, i915__) \ |
| 385 | for ((engine__) = rb_to_uabi_engine(rb_first(&(i915__)->uabi_engines));\ |
| 386 | (engine__); \ |
| 387 | (engine__) = rb_to_uabi_engine(rb_next(&(engine__)->uabi_node))) |
| 388 | |
| 389 | #endif /* _INTEL_RINGBUFFER_H_ */ |
| 390 | |