| 1 | /* SPDX-License-Identifier: MIT */ |
| 2 | /* |
| 3 | * Copyright © 2019 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #ifndef _I915_PERF_TYPES_H_ |
| 7 | #define _I915_PERF_TYPES_H_ |
| 8 | |
| 9 | #include <linux/atomic.h> |
| 10 | #include <linux/device.h> |
| 11 | #include <linux/hrtimer.h> |
| 12 | #include <linux/llist.h> |
| 13 | #include <linux/poll.h> |
| 14 | #include <linux/sysfs.h> |
| 15 | #include <linux/types.h> |
| 16 | #include <linux/uuid.h> |
| 17 | #include <linux/wait.h> |
| 18 | #include <uapi/drm/i915_drm.h> |
| 19 | |
| 20 | #include "gt/intel_engine_types.h" |
| 21 | #include "gt/intel_sseu.h" |
| 22 | #include "i915_reg_defs.h" |
| 23 | #include "intel_uncore.h" |
| 24 | #include "intel_wakeref.h" |
| 25 | |
| 26 | struct drm_i915_private; |
| 27 | struct file; |
| 28 | struct i915_active; |
| 29 | struct i915_gem_context; |
| 30 | struct i915_perf; |
| 31 | struct i915_vma; |
| 32 | struct intel_context; |
| 33 | struct intel_engine_cs; |
| 34 | |
| 35 | enum { |
| 36 | PERF_GROUP_OAG = 0, |
| 37 | PERF_GROUP_OAM_SAMEDIA_0 = 0, |
| 38 | |
| 39 | PERF_GROUP_MAX, |
| 40 | PERF_GROUP_INVALID = U32_MAX, |
| 41 | }; |
| 42 | |
| 43 | enum { |
| 44 | HDR_32_BIT = 0, |
| 45 | HDR_64_BIT, |
| 46 | }; |
| 47 | |
| 48 | struct i915_perf_regs { |
| 49 | u32 base; |
| 50 | i915_reg_t oa_head_ptr; |
| 51 | i915_reg_t oa_tail_ptr; |
| 52 | i915_reg_t oa_buffer; |
| 53 | i915_reg_t oa_ctx_ctrl; |
| 54 | i915_reg_t oa_ctrl; |
| 55 | i915_reg_t oa_debug; |
| 56 | i915_reg_t oa_status; |
| 57 | u32 oa_ctrl_counter_format_shift; |
| 58 | }; |
| 59 | |
| 60 | enum oa_type { |
| 61 | TYPE_OAG, |
| 62 | TYPE_OAM, |
| 63 | }; |
| 64 | |
| 65 | struct i915_oa_format { |
| 66 | u32 format; |
| 67 | int size; |
| 68 | int type; |
| 69 | enum report_header ; |
| 70 | }; |
| 71 | |
| 72 | struct i915_oa_reg { |
| 73 | i915_reg_t addr; |
| 74 | u32 value; |
| 75 | }; |
| 76 | |
| 77 | struct i915_oa_config { |
| 78 | struct i915_perf *perf; |
| 79 | |
| 80 | char uuid[UUID_STRING_LEN + 1]; |
| 81 | int id; |
| 82 | |
| 83 | const struct i915_oa_reg *mux_regs; |
| 84 | u32 mux_regs_len; |
| 85 | const struct i915_oa_reg *b_counter_regs; |
| 86 | u32 b_counter_regs_len; |
| 87 | const struct i915_oa_reg *flex_regs; |
| 88 | u32 flex_regs_len; |
| 89 | |
| 90 | struct attribute_group sysfs_metric; |
| 91 | struct attribute *attrs[2]; |
| 92 | struct kobj_attribute sysfs_metric_id; |
| 93 | |
| 94 | struct kref ref; |
| 95 | struct rcu_head rcu; |
| 96 | }; |
| 97 | |
| 98 | struct i915_perf_stream; |
| 99 | |
| 100 | /** |
| 101 | * struct i915_perf_stream_ops - the OPs to support a specific stream type |
| 102 | */ |
| 103 | struct i915_perf_stream_ops { |
| 104 | /** |
| 105 | * @enable: Enables the collection of HW samples, either in response to |
| 106 | * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened |
| 107 | * without `I915_PERF_FLAG_DISABLED`. |
| 108 | */ |
| 109 | void (*enable)(struct i915_perf_stream *stream); |
| 110 | |
| 111 | /** |
| 112 | * @disable: Disables the collection of HW samples, either in response |
| 113 | * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying |
| 114 | * the stream. |
| 115 | */ |
| 116 | void (*disable)(struct i915_perf_stream *stream); |
| 117 | |
| 118 | /** |
| 119 | * @poll_wait: Call poll_wait, passing a wait queue that will be woken |
| 120 | * once there is something ready to read() for the stream |
| 121 | */ |
| 122 | void (*poll_wait)(struct i915_perf_stream *stream, |
| 123 | struct file *file, |
| 124 | poll_table *wait); |
| 125 | |
| 126 | /** |
| 127 | * @wait_unlocked: For handling a blocking read, wait until there is |
| 128 | * something to ready to read() for the stream. E.g. wait on the same |
| 129 | * wait queue that would be passed to poll_wait(). |
| 130 | */ |
| 131 | int (*wait_unlocked)(struct i915_perf_stream *stream); |
| 132 | |
| 133 | /** |
| 134 | * @read: Copy buffered metrics as records to userspace |
| 135 | * **buf**: the userspace, destination buffer |
| 136 | * **count**: the number of bytes to copy, requested by userspace |
| 137 | * **offset**: zero at the start of the read, updated as the read |
| 138 | * proceeds, it represents how many bytes have been copied so far and |
| 139 | * the buffer offset for copying the next record. |
| 140 | * |
| 141 | * Copy as many buffered i915 perf samples and records for this stream |
| 142 | * to userspace as will fit in the given buffer. |
| 143 | * |
| 144 | * Only write complete records; returning -%ENOSPC if there isn't room |
| 145 | * for a complete record. |
| 146 | * |
| 147 | * Return any error condition that results in a short read such as |
| 148 | * -%ENOSPC or -%EFAULT, even though these may be squashed before |
| 149 | * returning to userspace. |
| 150 | */ |
| 151 | int (*read)(struct i915_perf_stream *stream, |
| 152 | char __user *buf, |
| 153 | size_t count, |
| 154 | size_t *offset); |
| 155 | |
| 156 | /** |
| 157 | * @destroy: Cleanup any stream specific resources. |
| 158 | * |
| 159 | * The stream will always be disabled before this is called. |
| 160 | */ |
| 161 | void (*destroy)(struct i915_perf_stream *stream); |
| 162 | }; |
| 163 | |
| 164 | /** |
| 165 | * struct i915_perf_stream - state for a single open stream FD |
| 166 | */ |
| 167 | struct i915_perf_stream { |
| 168 | /** |
| 169 | * @perf: i915_perf backpointer |
| 170 | */ |
| 171 | struct i915_perf *perf; |
| 172 | |
| 173 | /** |
| 174 | * @uncore: mmio access path |
| 175 | */ |
| 176 | struct intel_uncore *uncore; |
| 177 | |
| 178 | /** |
| 179 | * @engine: Engine associated with this performance stream. |
| 180 | */ |
| 181 | struct intel_engine_cs *engine; |
| 182 | |
| 183 | /** |
| 184 | * @lock: Lock associated with operations on stream |
| 185 | */ |
| 186 | struct mutex lock; |
| 187 | |
| 188 | /** |
| 189 | * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*` |
| 190 | * properties given when opening a stream, representing the contents |
| 191 | * of a single sample as read() by userspace. |
| 192 | */ |
| 193 | u32 sample_flags; |
| 194 | |
| 195 | /** |
| 196 | * @sample_size: Considering the configured contents of a sample |
| 197 | * combined with the required header size, this is the total size |
| 198 | * of a single sample record. |
| 199 | */ |
| 200 | int sample_size; |
| 201 | |
| 202 | /** |
| 203 | * @ctx: %NULL if measuring system-wide across all contexts or a |
| 204 | * specific context that is being monitored. |
| 205 | */ |
| 206 | struct i915_gem_context *ctx; |
| 207 | |
| 208 | /** |
| 209 | * @enabled: Whether the stream is currently enabled, considering |
| 210 | * whether the stream was opened in a disabled state and based |
| 211 | * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls. |
| 212 | */ |
| 213 | bool enabled; |
| 214 | |
| 215 | /** |
| 216 | * @hold_preemption: Whether preemption is put on hold for command |
| 217 | * submissions done on the @ctx. This is useful for some drivers that |
| 218 | * cannot easily post process the OA buffer context to subtract delta |
| 219 | * of performance counters not associated with @ctx. |
| 220 | */ |
| 221 | bool hold_preemption; |
| 222 | |
| 223 | /** |
| 224 | * @ops: The callbacks providing the implementation of this specific |
| 225 | * type of configured stream. |
| 226 | */ |
| 227 | const struct i915_perf_stream_ops *ops; |
| 228 | |
| 229 | /** |
| 230 | * @oa_config: The OA configuration used by the stream. |
| 231 | */ |
| 232 | struct i915_oa_config *oa_config; |
| 233 | |
| 234 | /** |
| 235 | * @oa_config_bos: A list of struct i915_oa_config_bo allocated lazily |
| 236 | * each time @oa_config changes. |
| 237 | */ |
| 238 | struct llist_head oa_config_bos; |
| 239 | |
| 240 | /** |
| 241 | * @pinned_ctx: The OA context specific information. |
| 242 | */ |
| 243 | struct intel_context *pinned_ctx; |
| 244 | |
| 245 | /** |
| 246 | * @specific_ctx_id: The id of the specific context. |
| 247 | */ |
| 248 | u32 specific_ctx_id; |
| 249 | |
| 250 | /** |
| 251 | * @specific_ctx_id_mask: The mask used to masking specific_ctx_id bits. |
| 252 | */ |
| 253 | u32 specific_ctx_id_mask; |
| 254 | |
| 255 | /** |
| 256 | * @poll_check_timer: High resolution timer that will periodically |
| 257 | * check for data in the circular OA buffer for notifying userspace |
| 258 | * (e.g. during a read() or poll()). |
| 259 | */ |
| 260 | struct hrtimer poll_check_timer; |
| 261 | |
| 262 | /** |
| 263 | * @poll_wq: The wait queue that hrtimer callback wakes when it |
| 264 | * sees data ready to read in the circular OA buffer. |
| 265 | */ |
| 266 | wait_queue_head_t poll_wq; |
| 267 | |
| 268 | /** |
| 269 | * @pollin: Whether there is data available to read. |
| 270 | */ |
| 271 | bool pollin; |
| 272 | |
| 273 | /** |
| 274 | * @periodic: Whether periodic sampling is currently enabled. |
| 275 | */ |
| 276 | bool periodic; |
| 277 | |
| 278 | /** |
| 279 | * @period_exponent: The OA unit sampling frequency is derived from this. |
| 280 | */ |
| 281 | int period_exponent; |
| 282 | |
| 283 | /** |
| 284 | * @oa_buffer: State of the OA buffer. |
| 285 | */ |
| 286 | struct { |
| 287 | const struct i915_oa_format *format; |
| 288 | struct i915_vma *vma; |
| 289 | u8 *vaddr; |
| 290 | u32 last_ctx_id; |
| 291 | |
| 292 | /** |
| 293 | * @oa_buffer.ptr_lock: Locks reads and writes to all |
| 294 | * head/tail state |
| 295 | * |
| 296 | * Consider: the head and tail pointer state needs to be read |
| 297 | * consistently from a hrtimer callback (atomic context) and |
| 298 | * read() fop (user context) with tail pointer updates happening |
| 299 | * in atomic context and head updates in user context and the |
| 300 | * (unlikely) possibility of read() errors needing to reset all |
| 301 | * head/tail state. |
| 302 | * |
| 303 | * Note: Contention/performance aren't currently a significant |
| 304 | * concern here considering the relatively low frequency of |
| 305 | * hrtimer callbacks (5ms period) and that reads typically only |
| 306 | * happen in response to a hrtimer event and likely complete |
| 307 | * before the next callback. |
| 308 | * |
| 309 | * Note: This lock is not held *while* reading and copying data |
| 310 | * to userspace so the value of head observed in htrimer |
| 311 | * callbacks won't represent any partial consumption of data. |
| 312 | */ |
| 313 | spinlock_t ptr_lock; |
| 314 | |
| 315 | /** |
| 316 | * @oa_buffer.head: Although we can always read back |
| 317 | * the head pointer register, |
| 318 | * we prefer to avoid trusting the HW state, just to avoid any |
| 319 | * risk that some hardware condition could * somehow bump the |
| 320 | * head pointer unpredictably and cause us to forward the wrong |
| 321 | * OA buffer data to userspace. |
| 322 | */ |
| 323 | u32 head; |
| 324 | |
| 325 | /** |
| 326 | * @oa_buffer.tail: The last verified tail that can be |
| 327 | * read by userspace. |
| 328 | */ |
| 329 | u32 tail; |
| 330 | } oa_buffer; |
| 331 | |
| 332 | /** |
| 333 | * @noa_wait: A batch buffer doing a wait on the GPU for the NOA logic to be |
| 334 | * reprogrammed. |
| 335 | */ |
| 336 | struct i915_vma *noa_wait; |
| 337 | |
| 338 | /** |
| 339 | * @poll_oa_period: The period in nanoseconds at which the OA |
| 340 | * buffer should be checked for available data. |
| 341 | */ |
| 342 | u64 poll_oa_period; |
| 343 | }; |
| 344 | |
| 345 | /** |
| 346 | * struct i915_oa_ops - Gen specific implementation of an OA unit stream |
| 347 | */ |
| 348 | struct i915_oa_ops { |
| 349 | /** |
| 350 | * @is_valid_b_counter_reg: Validates register's address for |
| 351 | * programming boolean counters for a particular platform. |
| 352 | */ |
| 353 | bool (*is_valid_b_counter_reg)(struct i915_perf *perf, u32 addr); |
| 354 | |
| 355 | /** |
| 356 | * @is_valid_mux_reg: Validates register's address for programming mux |
| 357 | * for a particular platform. |
| 358 | */ |
| 359 | bool (*is_valid_mux_reg)(struct i915_perf *perf, u32 addr); |
| 360 | |
| 361 | /** |
| 362 | * @is_valid_flex_reg: Validates register's address for programming |
| 363 | * flex EU filtering for a particular platform. |
| 364 | */ |
| 365 | bool (*is_valid_flex_reg)(struct i915_perf *perf, u32 addr); |
| 366 | |
| 367 | /** |
| 368 | * @enable_metric_set: Selects and applies any MUX configuration to set |
| 369 | * up the Boolean and Custom (B/C) counters that are part of the |
| 370 | * counter reports being sampled. May apply system constraints such as |
| 371 | * disabling EU clock gating as required. |
| 372 | */ |
| 373 | int (*enable_metric_set)(struct i915_perf_stream *stream, |
| 374 | struct i915_active *active); |
| 375 | |
| 376 | /** |
| 377 | * @disable_metric_set: Remove system constraints associated with using |
| 378 | * the OA unit. |
| 379 | */ |
| 380 | void (*disable_metric_set)(struct i915_perf_stream *stream); |
| 381 | |
| 382 | /** |
| 383 | * @oa_enable: Enable periodic sampling |
| 384 | */ |
| 385 | void (*oa_enable)(struct i915_perf_stream *stream); |
| 386 | |
| 387 | /** |
| 388 | * @oa_disable: Disable periodic sampling |
| 389 | */ |
| 390 | void (*oa_disable)(struct i915_perf_stream *stream); |
| 391 | |
| 392 | /** |
| 393 | * @read: Copy data from the circular OA buffer into a given userspace |
| 394 | * buffer. |
| 395 | */ |
| 396 | int (*read)(struct i915_perf_stream *stream, |
| 397 | char __user *buf, |
| 398 | size_t count, |
| 399 | size_t *offset); |
| 400 | |
| 401 | /** |
| 402 | * @oa_hw_tail_read: read the OA tail pointer register |
| 403 | * |
| 404 | * In particular this enables us to share all the fiddly code for |
| 405 | * handling the OA unit tail pointer race that affects multiple |
| 406 | * generations. |
| 407 | */ |
| 408 | u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream); |
| 409 | }; |
| 410 | |
| 411 | struct i915_perf_group { |
| 412 | /* |
| 413 | * @exclusive_stream: The stream currently using the OA unit. This is |
| 414 | * sometimes accessed outside a syscall associated to its file |
| 415 | * descriptor. |
| 416 | */ |
| 417 | struct i915_perf_stream *exclusive_stream; |
| 418 | |
| 419 | /* |
| 420 | * @num_engines: The number of engines using this OA unit. |
| 421 | */ |
| 422 | u32 num_engines; |
| 423 | |
| 424 | /* |
| 425 | * @regs: OA buffer register group for programming the OA unit. |
| 426 | */ |
| 427 | struct i915_perf_regs regs; |
| 428 | |
| 429 | /* |
| 430 | * @type: Type of OA unit - OAM, OAG etc. |
| 431 | */ |
| 432 | enum oa_type type; |
| 433 | }; |
| 434 | |
| 435 | struct i915_perf_gt { |
| 436 | /* |
| 437 | * Lock associated with anything below within this structure. |
| 438 | */ |
| 439 | struct mutex lock; |
| 440 | |
| 441 | /** |
| 442 | * @sseu: sseu configuration selected to run while perf is active, |
| 443 | * applies to all contexts. |
| 444 | */ |
| 445 | struct intel_sseu sseu; |
| 446 | |
| 447 | /** |
| 448 | * @num_perf_groups: number of perf groups per gt. |
| 449 | */ |
| 450 | u32 num_perf_groups; |
| 451 | |
| 452 | /* |
| 453 | * @group: list of OA groups - one for each OA buffer. |
| 454 | */ |
| 455 | struct i915_perf_group *group; |
| 456 | }; |
| 457 | |
| 458 | struct i915_perf { |
| 459 | struct drm_i915_private *i915; |
| 460 | |
| 461 | struct kobject *metrics_kobj; |
| 462 | |
| 463 | /* |
| 464 | * Lock associated with adding/modifying/removing OA configs |
| 465 | * in perf->metrics_idr. |
| 466 | */ |
| 467 | struct mutex metrics_lock; |
| 468 | |
| 469 | /* |
| 470 | * List of dynamic configurations (struct i915_oa_config), you |
| 471 | * need to hold perf->metrics_lock to access it. |
| 472 | */ |
| 473 | struct idr metrics_idr; |
| 474 | |
| 475 | /** |
| 476 | * For rate limiting any notifications of spurious |
| 477 | * invalid OA reports |
| 478 | */ |
| 479 | struct ratelimit_state spurious_report_rs; |
| 480 | |
| 481 | /** |
| 482 | * For rate limiting any notifications of tail pointer |
| 483 | * race. |
| 484 | */ |
| 485 | struct ratelimit_state tail_pointer_race; |
| 486 | |
| 487 | u32 gen7_latched_oastatus1; |
| 488 | u32 ctx_oactxctrl_offset; |
| 489 | u32 ctx_flexeu0_offset; |
| 490 | |
| 491 | /** |
| 492 | * The RPT_ID/reason field for Gen8+ includes a bit |
| 493 | * to determine if the CTX ID in the report is valid |
| 494 | * but the specific bit differs between Gen 8 and 9 |
| 495 | */ |
| 496 | u32 gen8_valid_ctx_bit; |
| 497 | |
| 498 | struct i915_oa_ops ops; |
| 499 | const struct i915_oa_format *oa_formats; |
| 500 | |
| 501 | /** |
| 502 | * Use a format mask to store the supported formats |
| 503 | * for a platform. |
| 504 | */ |
| 505 | #define FORMAT_MASK_SIZE DIV_ROUND_UP(I915_OA_FORMAT_MAX - 1, BITS_PER_LONG) |
| 506 | unsigned long format_mask[FORMAT_MASK_SIZE]; |
| 507 | |
| 508 | atomic64_t noa_programming_delay; |
| 509 | }; |
| 510 | |
| 511 | #endif /* _I915_PERF_TYPES_H_ */ |
| 512 | |