| 1 | // SPDX-License-Identifier: MIT |
| 2 | /* |
| 3 | * Copyright © 2023-2024 Intel Corporation |
| 4 | */ |
| 5 | |
| 6 | #include <linux/anon_inodes.h> |
| 7 | #include <linux/delay.h> |
| 8 | #include <linux/nospec.h> |
| 9 | #include <linux/poll.h> |
| 10 | |
| 11 | #include <drm/drm_drv.h> |
| 12 | #include <drm/drm_managed.h> |
| 13 | #include <drm/drm_syncobj.h> |
| 14 | #include <uapi/drm/xe_drm.h> |
| 15 | |
| 16 | #include <generated/xe_wa_oob.h> |
| 17 | |
| 18 | #include "abi/guc_actions_slpc_abi.h" |
| 19 | #include "instructions/xe_mi_commands.h" |
| 20 | #include "regs/xe_engine_regs.h" |
| 21 | #include "regs/xe_gt_regs.h" |
| 22 | #include "regs/xe_oa_regs.h" |
| 23 | #include "xe_assert.h" |
| 24 | #include "xe_bb.h" |
| 25 | #include "xe_bo.h" |
| 26 | #include "xe_device.h" |
| 27 | #include "xe_exec_queue.h" |
| 28 | #include "xe_force_wake.h" |
| 29 | #include "xe_gt.h" |
| 30 | #include "xe_gt_mcr.h" |
| 31 | #include "xe_gt_printk.h" |
| 32 | #include "xe_guc_pc.h" |
| 33 | #include "xe_macros.h" |
| 34 | #include "xe_mmio.h" |
| 35 | #include "xe_oa.h" |
| 36 | #include "xe_observation.h" |
| 37 | #include "xe_pm.h" |
| 38 | #include "xe_sched_job.h" |
| 39 | #include "xe_sriov.h" |
| 40 | #include "xe_sync.h" |
| 41 | #include "xe_wa.h" |
| 42 | |
| 43 | #define DEFAULT_POLL_FREQUENCY_HZ 200 |
| 44 | #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ) |
| 45 | #define XE_OA_UNIT_INVALID U32_MAX |
| 46 | |
| 47 | enum xe_oam_unit_type { |
| 48 | XE_OAM_UNIT_SAG, |
| 49 | XE_OAM_UNIT_SCMI_0, |
| 50 | XE_OAM_UNIT_SCMI_1, |
| 51 | }; |
| 52 | |
| 53 | enum xe_oa_submit_deps { |
| 54 | XE_OA_SUBMIT_NO_DEPS, |
| 55 | XE_OA_SUBMIT_ADD_DEPS, |
| 56 | }; |
| 57 | |
| 58 | enum xe_oa_user_extn_from { |
| 59 | XE_OA_USER_EXTN_FROM_OPEN, |
| 60 | XE_OA_USER_EXTN_FROM_CONFIG, |
| 61 | }; |
| 62 | |
| 63 | struct xe_oa_reg { |
| 64 | struct xe_reg addr; |
| 65 | u32 value; |
| 66 | }; |
| 67 | |
| 68 | struct xe_oa_config { |
| 69 | struct xe_oa *oa; |
| 70 | |
| 71 | char uuid[UUID_STRING_LEN + 1]; |
| 72 | int id; |
| 73 | |
| 74 | const struct xe_oa_reg *regs; |
| 75 | u32 regs_len; |
| 76 | |
| 77 | struct attribute_group sysfs_metric; |
| 78 | struct attribute *attrs[2]; |
| 79 | struct kobj_attribute sysfs_metric_id; |
| 80 | |
| 81 | struct kref ref; |
| 82 | struct rcu_head rcu; |
| 83 | }; |
| 84 | |
| 85 | struct xe_oa_open_param { |
| 86 | struct xe_file *xef; |
| 87 | struct xe_oa_unit *oa_unit; |
| 88 | bool sample; |
| 89 | u32 metric_set; |
| 90 | enum xe_oa_format_name oa_format; |
| 91 | int period_exponent; |
| 92 | bool disabled; |
| 93 | int exec_queue_id; |
| 94 | int engine_instance; |
| 95 | struct xe_exec_queue *exec_q; |
| 96 | struct xe_hw_engine *hwe; |
| 97 | bool no_preempt; |
| 98 | struct drm_xe_sync __user *syncs_user; |
| 99 | int num_syncs; |
| 100 | struct xe_sync_entry *syncs; |
| 101 | size_t oa_buffer_size; |
| 102 | int wait_num_reports; |
| 103 | }; |
| 104 | |
| 105 | struct xe_oa_config_bo { |
| 106 | struct llist_node node; |
| 107 | |
| 108 | struct xe_oa_config *oa_config; |
| 109 | struct xe_bb *bb; |
| 110 | }; |
| 111 | |
| 112 | struct xe_oa_fence { |
| 113 | /* @base: dma fence base */ |
| 114 | struct dma_fence base; |
| 115 | /* @lock: lock for the fence */ |
| 116 | spinlock_t lock; |
| 117 | /* @work: work to signal @base */ |
| 118 | struct delayed_work work; |
| 119 | /* @cb: callback to schedule @work */ |
| 120 | struct dma_fence_cb cb; |
| 121 | }; |
| 122 | |
| 123 | #define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x |
| 124 | |
| 125 | static const struct xe_oa_format oa_formats[] = { |
| 126 | [XE_OA_FORMAT_C4_B8] = { 7, 64, DRM_FMT(OAG) }, |
| 127 | [XE_OA_FORMAT_A12] = { 0, 64, DRM_FMT(OAG) }, |
| 128 | [XE_OA_FORMAT_A12_B8_C8] = { 2, 128, DRM_FMT(OAG) }, |
| 129 | [XE_OA_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, |
| 130 | [XE_OAR_FORMAT_A32u40_A4u32_B8_C8] = { 5, 256, DRM_FMT(OAR) }, |
| 131 | [XE_OA_FORMAT_A24u40_A14u32_B8_C8] = { 5, 256, DRM_FMT(OAG) }, |
| 132 | [XE_OAC_FORMAT_A24u64_B8_C8] = { 1, 320, DRM_FMT(OAC), HDR_64_BIT }, |
| 133 | [XE_OAC_FORMAT_A22u32_R2u32_B8_C8] = { 2, 192, DRM_FMT(OAC), HDR_64_BIT }, |
| 134 | [XE_OAM_FORMAT_MPEC8u64_B8_C8] = { 1, 192, DRM_FMT(OAM_MPEC), HDR_64_BIT }, |
| 135 | [XE_OAM_FORMAT_MPEC8u32_B8_C8] = { 2, 128, DRM_FMT(OAM_MPEC), HDR_64_BIT }, |
| 136 | [XE_OA_FORMAT_PEC64u64] = { 1, 576, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, |
| 137 | [XE_OA_FORMAT_PEC64u64_B8_C8] = { 1, 640, DRM_FMT(PEC), HDR_64_BIT, 1, 1 }, |
| 138 | [XE_OA_FORMAT_PEC64u32] = { 1, 320, DRM_FMT(PEC), HDR_64_BIT }, |
| 139 | [XE_OA_FORMAT_PEC32u64_G1] = { 5, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, |
| 140 | [XE_OA_FORMAT_PEC32u32_G1] = { 5, 192, DRM_FMT(PEC), HDR_64_BIT }, |
| 141 | [XE_OA_FORMAT_PEC32u64_G2] = { 6, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, |
| 142 | [XE_OA_FORMAT_PEC32u32_G2] = { 6, 192, DRM_FMT(PEC), HDR_64_BIT }, |
| 143 | [XE_OA_FORMAT_PEC36u64_G1_32_G2_4] = { 3, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, |
| 144 | [XE_OA_FORMAT_PEC36u64_G1_4_G2_32] = { 4, 320, DRM_FMT(PEC), HDR_64_BIT, 1, 0 }, |
| 145 | }; |
| 146 | |
| 147 | static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) |
| 148 | { |
| 149 | return tail >= head ? tail - head : |
| 150 | tail + stream->oa_buffer.circ_size - head; |
| 151 | } |
| 152 | |
| 153 | static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n) |
| 154 | { |
| 155 | return ptr + n >= stream->oa_buffer.circ_size ? |
| 156 | ptr + n - stream->oa_buffer.circ_size : ptr + n; |
| 157 | } |
| 158 | |
| 159 | static void xe_oa_config_release(struct kref *ref) |
| 160 | { |
| 161 | struct xe_oa_config *oa_config = |
| 162 | container_of(ref, typeof(*oa_config), ref); |
| 163 | |
| 164 | kfree(objp: oa_config->regs); |
| 165 | |
| 166 | kfree_rcu(oa_config, rcu); |
| 167 | } |
| 168 | |
| 169 | static void xe_oa_config_put(struct xe_oa_config *oa_config) |
| 170 | { |
| 171 | if (!oa_config) |
| 172 | return; |
| 173 | |
| 174 | kref_put(kref: &oa_config->ref, release: xe_oa_config_release); |
| 175 | } |
| 176 | |
| 177 | static struct xe_oa_config *xe_oa_config_get(struct xe_oa_config *oa_config) |
| 178 | { |
| 179 | return kref_get_unless_zero(kref: &oa_config->ref) ? oa_config : NULL; |
| 180 | } |
| 181 | |
| 182 | static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_set) |
| 183 | { |
| 184 | struct xe_oa_config *oa_config; |
| 185 | |
| 186 | rcu_read_lock(); |
| 187 | oa_config = idr_find(&oa->metrics_idr, id: metrics_set); |
| 188 | if (oa_config) |
| 189 | oa_config = xe_oa_config_get(oa_config); |
| 190 | rcu_read_unlock(); |
| 191 | |
| 192 | return oa_config; |
| 193 | } |
| 194 | |
| 195 | static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence) |
| 196 | { |
| 197 | xe_oa_config_put(oa_config: oa_bo->oa_config); |
| 198 | xe_bb_free(bb: oa_bo->bb, fence: last_fence); |
| 199 | kfree(objp: oa_bo); |
| 200 | } |
| 201 | |
| 202 | static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream) |
| 203 | { |
| 204 | return &stream->oa_unit->regs; |
| 205 | } |
| 206 | |
| 207 | static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream) |
| 208 | { |
| 209 | return xe_mmio_read32(mmio: &stream->gt->mmio, reg: __oa_regs(stream)->oa_tail_ptr) & |
| 210 | OAG_OATAILPTR_MASK; |
| 211 | } |
| 212 | |
| 213 | #define (__s) \ |
| 214 | ((__s)->oa_buffer.format->header == HDR_64_BIT) |
| 215 | |
| 216 | static u64 oa_report_id(struct xe_oa_stream *stream, void *report) |
| 217 | { |
| 218 | return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; |
| 219 | } |
| 220 | |
| 221 | static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) |
| 222 | { |
| 223 | if (oa_report_header_64bit(stream)) |
| 224 | *(u64 *)report = 0; |
| 225 | else |
| 226 | *report = 0; |
| 227 | } |
| 228 | |
| 229 | static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) |
| 230 | { |
| 231 | return oa_report_header_64bit(stream) ? |
| 232 | *((u64 *)report + 1) : |
| 233 | *((u32 *)report + 1); |
| 234 | } |
| 235 | |
| 236 | static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) |
| 237 | { |
| 238 | if (oa_report_header_64bit(stream)) |
| 239 | *(u64 *)&report[2] = 0; |
| 240 | else |
| 241 | report[1] = 0; |
| 242 | } |
| 243 | |
| 244 | static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) |
| 245 | { |
| 246 | u32 gtt_offset = xe_bo_ggtt_addr(bo: stream->oa_buffer.bo); |
| 247 | u32 tail, hw_tail, partial_report_size, available; |
| 248 | int report_size = stream->oa_buffer.format->size; |
| 249 | unsigned long flags; |
| 250 | |
| 251 | spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
| 252 | |
| 253 | hw_tail = xe_oa_hw_tail_read(stream); |
| 254 | hw_tail -= gtt_offset; |
| 255 | |
| 256 | /* |
| 257 | * The tail pointer increases in 64 byte (cacheline size), not in report_size |
| 258 | * increments. Also report size may not be a power of 2. Compute potential |
| 259 | * partially landed report in OA buffer. |
| 260 | */ |
| 261 | partial_report_size = xe_oa_circ_diff(stream, tail: hw_tail, head: stream->oa_buffer.tail); |
| 262 | partial_report_size %= report_size; |
| 263 | |
| 264 | /* Subtract partial amount off the tail */ |
| 265 | hw_tail = xe_oa_circ_diff(stream, tail: hw_tail, head: partial_report_size); |
| 266 | |
| 267 | tail = hw_tail; |
| 268 | |
| 269 | /* |
| 270 | * Walk the stream backward until we find a report with report id and timestamp |
| 271 | * not 0. We can't tell whether a report has fully landed in memory before the |
| 272 | * report id and timestamp of the following report have landed. |
| 273 | * |
| 274 | * This is assuming that the writes of the OA unit land in memory in the order |
| 275 | * they were written. If not : (╯°□°)╯︵ ┻━┻ |
| 276 | */ |
| 277 | while (xe_oa_circ_diff(stream, tail, head: stream->oa_buffer.tail) >= report_size) { |
| 278 | void *report = stream->oa_buffer.vaddr + tail; |
| 279 | |
| 280 | if (oa_report_id(stream, report) || oa_timestamp(stream, report)) |
| 281 | break; |
| 282 | |
| 283 | tail = xe_oa_circ_diff(stream, tail, head: report_size); |
| 284 | } |
| 285 | |
| 286 | if (xe_oa_circ_diff(stream, tail: hw_tail, head: tail) > report_size) |
| 287 | drm_dbg(&stream->oa->xe->drm, |
| 288 | "unlanded report(s) head=0x%x tail=0x%x hw_tail=0x%x\n" , |
| 289 | stream->oa_buffer.head, tail, hw_tail); |
| 290 | |
| 291 | stream->oa_buffer.tail = tail; |
| 292 | |
| 293 | available = xe_oa_circ_diff(stream, tail: stream->oa_buffer.tail, head: stream->oa_buffer.head); |
| 294 | stream->pollin = available >= stream->wait_num_reports * report_size; |
| 295 | |
| 296 | spin_unlock_irqrestore(lock: &stream->oa_buffer.ptr_lock, flags); |
| 297 | |
| 298 | return stream->pollin; |
| 299 | } |
| 300 | |
| 301 | static enum hrtimer_restart xe_oa_poll_check_timer_cb(struct hrtimer *hrtimer) |
| 302 | { |
| 303 | struct xe_oa_stream *stream = |
| 304 | container_of(hrtimer, typeof(*stream), poll_check_timer); |
| 305 | |
| 306 | if (xe_oa_buffer_check_unlocked(stream)) |
| 307 | wake_up(&stream->poll_wq); |
| 308 | |
| 309 | hrtimer_forward_now(timer: hrtimer, interval: ns_to_ktime(ns: stream->poll_period_ns)); |
| 310 | |
| 311 | return HRTIMER_RESTART; |
| 312 | } |
| 313 | |
| 314 | static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, |
| 315 | size_t count, size_t *offset, const u8 *report) |
| 316 | { |
| 317 | int report_size = stream->oa_buffer.format->size; |
| 318 | int report_size_partial; |
| 319 | u8 *oa_buf_end; |
| 320 | |
| 321 | if ((count - *offset) < report_size) |
| 322 | return -ENOSPC; |
| 323 | |
| 324 | buf += *offset; |
| 325 | |
| 326 | oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; |
| 327 | report_size_partial = oa_buf_end - report; |
| 328 | |
| 329 | if (report_size_partial < report_size) { |
| 330 | if (copy_to_user(to: buf, from: report, n: report_size_partial)) |
| 331 | return -EFAULT; |
| 332 | buf += report_size_partial; |
| 333 | |
| 334 | if (copy_to_user(to: buf, from: stream->oa_buffer.vaddr, |
| 335 | n: report_size - report_size_partial)) |
| 336 | return -EFAULT; |
| 337 | } else if (copy_to_user(to: buf, from: report, n: report_size)) { |
| 338 | return -EFAULT; |
| 339 | } |
| 340 | |
| 341 | *offset += report_size; |
| 342 | |
| 343 | return 0; |
| 344 | } |
| 345 | |
| 346 | static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, |
| 347 | size_t count, size_t *offset) |
| 348 | { |
| 349 | int report_size = stream->oa_buffer.format->size; |
| 350 | u8 *oa_buf_base = stream->oa_buffer.vaddr; |
| 351 | u32 gtt_offset = xe_bo_ggtt_addr(bo: stream->oa_buffer.bo); |
| 352 | size_t start_offset = *offset; |
| 353 | unsigned long flags; |
| 354 | u32 head, tail; |
| 355 | int ret = 0; |
| 356 | |
| 357 | spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
| 358 | head = stream->oa_buffer.head; |
| 359 | tail = stream->oa_buffer.tail; |
| 360 | spin_unlock_irqrestore(lock: &stream->oa_buffer.ptr_lock, flags); |
| 361 | |
| 362 | xe_assert(stream->oa->xe, |
| 363 | head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size); |
| 364 | |
| 365 | for (; xe_oa_circ_diff(stream, tail, head); |
| 366 | head = xe_oa_circ_incr(stream, ptr: head, n: report_size)) { |
| 367 | u8 *report = oa_buf_base + head; |
| 368 | |
| 369 | ret = xe_oa_append_report(stream, buf, count, offset, report); |
| 370 | if (ret) |
| 371 | break; |
| 372 | |
| 373 | if (!(stream->oa_buffer.circ_size % report_size)) { |
| 374 | /* Clear out report id and timestamp to detect unlanded reports */ |
| 375 | oa_report_id_clear(stream, report: (void *)report); |
| 376 | oa_timestamp_clear(stream, report: (void *)report); |
| 377 | } else { |
| 378 | u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; |
| 379 | u32 part = oa_buf_end - report; |
| 380 | |
| 381 | /* Zero out the entire report */ |
| 382 | if (report_size <= part) { |
| 383 | memset(report, 0, report_size); |
| 384 | } else { |
| 385 | memset(report, 0, part); |
| 386 | memset(oa_buf_base, 0, report_size - part); |
| 387 | } |
| 388 | } |
| 389 | } |
| 390 | |
| 391 | if (start_offset != *offset) { |
| 392 | struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; |
| 393 | |
| 394 | spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
| 395 | xe_mmio_write32(mmio: &stream->gt->mmio, reg: oaheadptr, |
| 396 | val: (head + gtt_offset) & OAG_OAHEADPTR_MASK); |
| 397 | stream->oa_buffer.head = head; |
| 398 | spin_unlock_irqrestore(lock: &stream->oa_buffer.ptr_lock, flags); |
| 399 | } |
| 400 | |
| 401 | return ret; |
| 402 | } |
| 403 | |
| 404 | static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) |
| 405 | { |
| 406 | u32 gtt_offset = xe_bo_ggtt_addr(bo: stream->oa_buffer.bo); |
| 407 | int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo)); |
| 408 | u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; |
| 409 | struct xe_mmio *mmio = &stream->gt->mmio; |
| 410 | unsigned long flags; |
| 411 | |
| 412 | /* |
| 413 | * If oa buffer size is more than 16MB (exponent greater than 24), the |
| 414 | * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set. |
| 415 | */ |
| 416 | oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK, |
| 417 | size_exponent > 24 ? size_exponent - 20 : size_exponent - 17); |
| 418 | |
| 419 | spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); |
| 420 | |
| 421 | xe_mmio_write32(mmio, reg: __oa_regs(stream)->oa_status, val: 0); |
| 422 | xe_mmio_write32(mmio, reg: __oa_regs(stream)->oa_head_ptr, |
| 423 | val: gtt_offset & OAG_OAHEADPTR_MASK); |
| 424 | stream->oa_buffer.head = 0; |
| 425 | /* |
| 426 | * PRM says: "This MMIO must be set before the OATAILPTR register and after the |
| 427 | * OAHEADPTR register. This is to enable proper functionality of the overflow bit". |
| 428 | */ |
| 429 | xe_mmio_write32(mmio, reg: __oa_regs(stream)->oa_buffer, val: oa_buf); |
| 430 | xe_mmio_write32(mmio, reg: __oa_regs(stream)->oa_tail_ptr, |
| 431 | val: gtt_offset & OAG_OATAILPTR_MASK); |
| 432 | |
| 433 | /* Mark that we need updated tail pointer to read from */ |
| 434 | stream->oa_buffer.tail = 0; |
| 435 | |
| 436 | spin_unlock_irqrestore(lock: &stream->oa_buffer.ptr_lock, flags); |
| 437 | |
| 438 | /* Zero out the OA buffer since we rely on zero report id and timestamp fields */ |
| 439 | memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo)); |
| 440 | } |
| 441 | |
| 442 | static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask) |
| 443 | { |
| 444 | return ((format->counter_select << (ffs(counter_sel_mask) - 1)) & counter_sel_mask) | |
| 445 | REG_FIELD_PREP(OA_OACONTROL_REPORT_BC_MASK, format->bc_report) | |
| 446 | REG_FIELD_PREP(OA_OACONTROL_COUNTER_SIZE_MASK, format->counter_size); |
| 447 | } |
| 448 | |
| 449 | static u32 __oa_ccs_select(struct xe_oa_stream *stream) |
| 450 | { |
| 451 | u32 val; |
| 452 | |
| 453 | if (stream->hwe->class != XE_ENGINE_CLASS_COMPUTE) |
| 454 | return 0; |
| 455 | |
| 456 | val = REG_FIELD_PREP(OAG_OACONTROL_OA_CCS_SELECT_MASK, stream->hwe->instance); |
| 457 | xe_assert(stream->oa->xe, |
| 458 | REG_FIELD_GET(OAG_OACONTROL_OA_CCS_SELECT_MASK, val) == stream->hwe->instance); |
| 459 | return val; |
| 460 | } |
| 461 | |
| 462 | static u32 __oactrl_used_bits(struct xe_oa_stream *stream) |
| 463 | { |
| 464 | return stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ? |
| 465 | OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS; |
| 466 | } |
| 467 | |
| 468 | static void xe_oa_enable(struct xe_oa_stream *stream) |
| 469 | { |
| 470 | const struct xe_oa_format *format = stream->oa_buffer.format; |
| 471 | const struct xe_oa_regs *regs; |
| 472 | u32 val; |
| 473 | |
| 474 | /* |
| 475 | * BSpec: 46822: Bit 0. Even if stream->sample is 0, for OAR to function, the OA |
| 476 | * buffer must be correctly initialized |
| 477 | */ |
| 478 | xe_oa_init_oa_buffer(stream); |
| 479 | |
| 480 | regs = __oa_regs(stream); |
| 481 | val = __format_to_oactrl(format, counter_sel_mask: regs->oa_ctrl_counter_select_mask) | |
| 482 | __oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE; |
| 483 | |
| 484 | if (GRAPHICS_VER(stream->oa->xe) >= 20 && |
| 485 | stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG) |
| 486 | val |= OAG_OACONTROL_OA_PES_DISAG_EN; |
| 487 | |
| 488 | xe_mmio_rmw32(mmio: &stream->gt->mmio, reg: regs->oa_ctrl, clr: __oactrl_used_bits(stream), set: val); |
| 489 | } |
| 490 | |
| 491 | static void xe_oa_disable(struct xe_oa_stream *stream) |
| 492 | { |
| 493 | struct xe_mmio *mmio = &stream->gt->mmio; |
| 494 | |
| 495 | xe_mmio_rmw32(mmio, reg: __oa_regs(stream)->oa_ctrl, clr: __oactrl_used_bits(stream), set: 0); |
| 496 | if (xe_mmio_wait32(mmio, reg: __oa_regs(stream)->oa_ctrl, |
| 497 | OAG_OACONTROL_OA_COUNTER_ENABLE, val: 0, timeout_us: 50000, NULL, atomic: false)) |
| 498 | drm_err(&stream->oa->xe->drm, |
| 499 | "wait for OA to be disabled timed out\n" ); |
| 500 | |
| 501 | if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) { |
| 502 | /* <= XE_METEORLAKE except XE_PVC */ |
| 503 | xe_mmio_write32(mmio, OA_TLB_INV_CR, val: 1); |
| 504 | if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, mask: 1, val: 0, timeout_us: 50000, NULL, atomic: false)) |
| 505 | drm_err(&stream->oa->xe->drm, |
| 506 | "wait for OA tlb invalidate timed out\n" ); |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) |
| 511 | { |
| 512 | /* We might wait indefinitely if periodic sampling is not enabled */ |
| 513 | if (!stream->periodic) |
| 514 | return -EINVAL; |
| 515 | |
| 516 | return wait_event_interruptible(stream->poll_wq, |
| 517 | xe_oa_buffer_check_unlocked(stream)); |
| 518 | } |
| 519 | |
| 520 | #define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ |
| 521 | OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) |
| 522 | |
| 523 | static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, |
| 524 | size_t count, size_t *offset) |
| 525 | { |
| 526 | /* Only clear our bits to avoid side-effects */ |
| 527 | stream->oa_status = xe_mmio_rmw32(mmio: &stream->gt->mmio, reg: __oa_regs(stream)->oa_status, |
| 528 | OASTATUS_RELEVANT_BITS, set: 0); |
| 529 | /* |
| 530 | * Signal to userspace that there is non-zero OA status to read via |
| 531 | * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl |
| 532 | */ |
| 533 | if (stream->oa_status & OASTATUS_RELEVANT_BITS) |
| 534 | return -EIO; |
| 535 | |
| 536 | return xe_oa_append_reports(stream, buf, count, offset); |
| 537 | } |
| 538 | |
| 539 | static ssize_t xe_oa_read(struct file *file, char __user *buf, |
| 540 | size_t count, loff_t *ppos) |
| 541 | { |
| 542 | struct xe_oa_stream *stream = file->private_data; |
| 543 | size_t offset = 0; |
| 544 | int ret; |
| 545 | |
| 546 | /* Can't read from disabled streams */ |
| 547 | if (!stream->enabled || !stream->sample) |
| 548 | return -EINVAL; |
| 549 | |
| 550 | if (!(file->f_flags & O_NONBLOCK)) { |
| 551 | do { |
| 552 | ret = xe_oa_wait_unlocked(stream); |
| 553 | if (ret) |
| 554 | return ret; |
| 555 | |
| 556 | mutex_lock(&stream->stream_lock); |
| 557 | ret = __xe_oa_read(stream, buf, count, offset: &offset); |
| 558 | mutex_unlock(lock: &stream->stream_lock); |
| 559 | } while (!offset && !ret); |
| 560 | } else { |
| 561 | xe_oa_buffer_check_unlocked(stream); |
| 562 | mutex_lock(&stream->stream_lock); |
| 563 | ret = __xe_oa_read(stream, buf, count, offset: &offset); |
| 564 | mutex_unlock(lock: &stream->stream_lock); |
| 565 | } |
| 566 | |
| 567 | /* |
| 568 | * Typically we clear pollin here in order to wait for the new hrtimer callback |
| 569 | * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, |
| 570 | * which means that more OA data is available than could fit in the user provided |
| 571 | * buffer. In this case we want the next poll() call to not block. |
| 572 | * |
| 573 | * Also in case of -EIO, we have already waited for data before returning |
| 574 | * -EIO, so need to wait again |
| 575 | */ |
| 576 | if (ret != -ENOSPC && ret != -EIO) |
| 577 | stream->pollin = false; |
| 578 | |
| 579 | /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ |
| 580 | return offset ?: (ret ?: -EAGAIN); |
| 581 | } |
| 582 | |
| 583 | static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, |
| 584 | struct file *file, poll_table *wait) |
| 585 | { |
| 586 | __poll_t events = 0; |
| 587 | |
| 588 | poll_wait(filp: file, wait_address: &stream->poll_wq, p: wait); |
| 589 | |
| 590 | /* |
| 591 | * We don't explicitly check whether there's something to read here since this |
| 592 | * path may be hot depending on what else userspace is polling, or on the timeout |
| 593 | * in use. We rely on hrtimer xe_oa_poll_check_timer_cb to notify us when there |
| 594 | * are samples to read |
| 595 | */ |
| 596 | if (stream->pollin) |
| 597 | events |= EPOLLIN; |
| 598 | |
| 599 | return events; |
| 600 | } |
| 601 | |
| 602 | static __poll_t xe_oa_poll(struct file *file, poll_table *wait) |
| 603 | { |
| 604 | struct xe_oa_stream *stream = file->private_data; |
| 605 | __poll_t ret; |
| 606 | |
| 607 | mutex_lock(&stream->stream_lock); |
| 608 | ret = xe_oa_poll_locked(stream, file, wait); |
| 609 | mutex_unlock(lock: &stream->stream_lock); |
| 610 | |
| 611 | return ret; |
| 612 | } |
| 613 | |
| 614 | static void xe_oa_lock_vma(struct xe_exec_queue *q) |
| 615 | { |
| 616 | if (q->vm) { |
| 617 | down_read(sem: &q->vm->lock); |
| 618 | xe_vm_lock(vm: q->vm, intr: false); |
| 619 | } |
| 620 | } |
| 621 | |
| 622 | static void xe_oa_unlock_vma(struct xe_exec_queue *q) |
| 623 | { |
| 624 | if (q->vm) { |
| 625 | xe_vm_unlock(vm: q->vm); |
| 626 | up_read(sem: &q->vm->lock); |
| 627 | } |
| 628 | } |
| 629 | |
| 630 | static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps, |
| 631 | struct xe_bb *bb) |
| 632 | { |
| 633 | struct xe_exec_queue *q = stream->exec_q ?: stream->k_exec_q; |
| 634 | struct xe_sched_job *job; |
| 635 | struct dma_fence *fence; |
| 636 | int err = 0; |
| 637 | |
| 638 | xe_oa_lock_vma(q); |
| 639 | |
| 640 | job = xe_bb_create_job(q, bb); |
| 641 | if (IS_ERR(ptr: job)) { |
| 642 | err = PTR_ERR(ptr: job); |
| 643 | goto exit; |
| 644 | } |
| 645 | job->ggtt = true; |
| 646 | |
| 647 | if (deps == XE_OA_SUBMIT_ADD_DEPS) { |
| 648 | for (int i = 0; i < stream->num_syncs && !err; i++) |
| 649 | err = xe_sync_entry_add_deps(sync: &stream->syncs[i], job); |
| 650 | if (err) { |
| 651 | drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n" , err); |
| 652 | goto err_put_job; |
| 653 | } |
| 654 | } |
| 655 | |
| 656 | xe_sched_job_arm(job); |
| 657 | fence = dma_fence_get(fence: &job->drm.s_fence->finished); |
| 658 | xe_sched_job_push(job); |
| 659 | |
| 660 | xe_oa_unlock_vma(q); |
| 661 | |
| 662 | return fence; |
| 663 | err_put_job: |
| 664 | xe_sched_job_put(job); |
| 665 | exit: |
| 666 | xe_oa_unlock_vma(q); |
| 667 | return ERR_PTR(error: err); |
| 668 | } |
| 669 | |
| 670 | static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs) |
| 671 | { |
| 672 | u32 i; |
| 673 | |
| 674 | #define MI_LOAD_REGISTER_IMM_MAX_REGS (126) |
| 675 | |
| 676 | for (i = 0; i < n_regs; i++) { |
| 677 | if ((i % MI_LOAD_REGISTER_IMM_MAX_REGS) == 0) { |
| 678 | u32 n_lri = min_t(u32, n_regs - i, |
| 679 | MI_LOAD_REGISTER_IMM_MAX_REGS); |
| 680 | |
| 681 | bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(n_lri); |
| 682 | } |
| 683 | bb->cs[bb->len++] = reg_data[i].addr.addr; |
| 684 | bb->cs[bb->len++] = reg_data[i].value; |
| 685 | } |
| 686 | } |
| 687 | |
| 688 | static int num_lri_dwords(int num_regs) |
| 689 | { |
| 690 | int count = 0; |
| 691 | |
| 692 | if (num_regs > 0) { |
| 693 | count += DIV_ROUND_UP(num_regs, MI_LOAD_REGISTER_IMM_MAX_REGS); |
| 694 | count += num_regs * 2; |
| 695 | } |
| 696 | |
| 697 | return count; |
| 698 | } |
| 699 | |
| 700 | static void xe_oa_free_oa_buffer(struct xe_oa_stream *stream) |
| 701 | { |
| 702 | xe_bo_unpin_map_no_vm(bo: stream->oa_buffer.bo); |
| 703 | } |
| 704 | |
| 705 | static void xe_oa_free_configs(struct xe_oa_stream *stream) |
| 706 | { |
| 707 | struct xe_oa_config_bo *oa_bo, *tmp; |
| 708 | |
| 709 | xe_oa_config_put(oa_config: stream->oa_config); |
| 710 | llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node) |
| 711 | free_oa_config_bo(oa_bo, last_fence: stream->last_fence); |
| 712 | dma_fence_put(fence: stream->last_fence); |
| 713 | } |
| 714 | |
| 715 | static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri, u32 count) |
| 716 | { |
| 717 | struct dma_fence *fence; |
| 718 | struct xe_bb *bb; |
| 719 | int err; |
| 720 | |
| 721 | bb = xe_bb_new(gt: stream->gt, dwords: 2 * count + 1, usm: false); |
| 722 | if (IS_ERR(ptr: bb)) { |
| 723 | err = PTR_ERR(ptr: bb); |
| 724 | goto exit; |
| 725 | } |
| 726 | |
| 727 | write_cs_mi_lri(bb, reg_data: reg_lri, n_regs: count); |
| 728 | |
| 729 | fence = xe_oa_submit_bb(stream, deps: XE_OA_SUBMIT_NO_DEPS, bb); |
| 730 | if (IS_ERR(ptr: fence)) { |
| 731 | err = PTR_ERR(ptr: fence); |
| 732 | goto free_bb; |
| 733 | } |
| 734 | xe_bb_free(bb, fence); |
| 735 | dma_fence_put(fence); |
| 736 | |
| 737 | return 0; |
| 738 | free_bb: |
| 739 | xe_bb_free(bb, NULL); |
| 740 | exit: |
| 741 | return err; |
| 742 | } |
| 743 | |
| 744 | static int xe_oa_configure_oar_context(struct xe_oa_stream *stream, bool enable) |
| 745 | { |
| 746 | const struct xe_oa_format *format = stream->oa_buffer.format; |
| 747 | u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | |
| 748 | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); |
| 749 | |
| 750 | struct xe_oa_reg reg_lri[] = { |
| 751 | { |
| 752 | OACTXCONTROL(stream->hwe->mmio_base), |
| 753 | enable ? OA_COUNTER_RESUME : 0, |
| 754 | }, |
| 755 | { |
| 756 | OAR_OACONTROL, |
| 757 | oacontrol, |
| 758 | }, |
| 759 | { |
| 760 | RING_CONTEXT_CONTROL(stream->hwe->mmio_base), |
| 761 | _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, |
| 762 | enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) |
| 763 | }, |
| 764 | }; |
| 765 | |
| 766 | return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); |
| 767 | } |
| 768 | |
| 769 | static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable) |
| 770 | { |
| 771 | const struct xe_oa_format *format = stream->oa_buffer.format; |
| 772 | u32 oacontrol = __format_to_oactrl(format, OAR_OACONTROL_COUNTER_SEL_MASK) | |
| 773 | (enable ? OAR_OACONTROL_COUNTER_ENABLE : 0); |
| 774 | struct xe_oa_reg reg_lri[] = { |
| 775 | { |
| 776 | OACTXCONTROL(stream->hwe->mmio_base), |
| 777 | enable ? OA_COUNTER_RESUME : 0, |
| 778 | }, |
| 779 | { |
| 780 | OAC_OACONTROL, |
| 781 | oacontrol |
| 782 | }, |
| 783 | { |
| 784 | RING_CONTEXT_CONTROL(stream->hwe->mmio_base), |
| 785 | _MASKED_FIELD(CTX_CTRL_OAC_CONTEXT_ENABLE, |
| 786 | enable ? CTX_CTRL_OAC_CONTEXT_ENABLE : 0) | |
| 787 | _MASKED_FIELD(CTX_CTRL_RUN_ALONE, enable ? CTX_CTRL_RUN_ALONE : 0), |
| 788 | }, |
| 789 | }; |
| 790 | |
| 791 | /* Set ccs select to enable programming of OAC_OACONTROL */ |
| 792 | xe_mmio_write32(mmio: &stream->gt->mmio, reg: __oa_regs(stream)->oa_ctrl, |
| 793 | val: __oa_ccs_select(stream)); |
| 794 | |
| 795 | return xe_oa_load_with_lri(stream, reg_lri, ARRAY_SIZE(reg_lri)); |
| 796 | } |
| 797 | |
| 798 | static int xe_oa_configure_oa_context(struct xe_oa_stream *stream, bool enable) |
| 799 | { |
| 800 | switch (stream->hwe->class) { |
| 801 | case XE_ENGINE_CLASS_RENDER: |
| 802 | return xe_oa_configure_oar_context(stream, enable); |
| 803 | case XE_ENGINE_CLASS_COMPUTE: |
| 804 | return xe_oa_configure_oac_context(stream, enable); |
| 805 | default: |
| 806 | /* Video engines do not support MI_REPORT_PERF_COUNT */ |
| 807 | return 0; |
| 808 | } |
| 809 | } |
| 810 | |
| 811 | #define HAS_OA_BPC_REPORTING(xe) (GRAPHICS_VERx100(xe) >= 1255) |
| 812 | |
| 813 | static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool enable) |
| 814 | { |
| 815 | return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_MMIO_TRG, |
| 816 | enable && stream && stream->sample ? |
| 817 | 0 : OAG_OA_DEBUG_DISABLE_MMIO_TRG); |
| 818 | } |
| 819 | |
| 820 | static void xe_oa_disable_metric_set(struct xe_oa_stream *stream) |
| 821 | { |
| 822 | struct xe_mmio *mmio = &stream->gt->mmio; |
| 823 | u32 sqcnt1; |
| 824 | |
| 825 | /* Enable thread stall DOP gating and EU DOP gating. */ |
| 826 | if (XE_GT_WA(stream->gt, 1508761755)) { |
| 827 | xe_gt_mcr_multicast_write(gt: stream->gt, ROW_CHICKEN, |
| 828 | _MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE)); |
| 829 | xe_gt_mcr_multicast_write(gt: stream->gt, ROW_CHICKEN2, |
| 830 | _MASKED_BIT_DISABLE(DISABLE_DOP_GATING)); |
| 831 | } |
| 832 | |
| 833 | xe_mmio_write32(mmio, reg: __oa_regs(stream)->oa_debug, |
| 834 | val: oag_configure_mmio_trigger(stream, enable: false)); |
| 835 | |
| 836 | /* disable the context save/restore or OAR counters */ |
| 837 | if (stream->exec_q) |
| 838 | xe_oa_configure_oa_context(stream, enable: false); |
| 839 | |
| 840 | /* Make sure we disable noa to save power. */ |
| 841 | if (GT_VER(stream->gt) < 35) |
| 842 | xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, set: 0); |
| 843 | |
| 844 | sqcnt1 = SQCNT1_PMON_ENABLE | |
| 845 | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); |
| 846 | |
| 847 | /* Reset PMON Enable to save power. */ |
| 848 | xe_mmio_rmw32(mmio, XELPMP_SQCNT1, clr: sqcnt1, set: 0); |
| 849 | |
| 850 | if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || |
| 851 | stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && |
| 852 | GRAPHICS_VER(stream->oa->xe) >= 30) |
| 853 | xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, OAM_LAT_MEASURE_ENABLE, set: 0); |
| 854 | } |
| 855 | |
| 856 | static void xe_oa_stream_destroy(struct xe_oa_stream *stream) |
| 857 | { |
| 858 | struct xe_oa_unit *u = stream->oa_unit; |
| 859 | struct xe_gt *gt = stream->hwe->gt; |
| 860 | |
| 861 | if (WARN_ON(stream != u->exclusive_stream)) |
| 862 | return; |
| 863 | |
| 864 | WRITE_ONCE(u->exclusive_stream, NULL); |
| 865 | |
| 866 | mutex_destroy(lock: &stream->stream_lock); |
| 867 | |
| 868 | xe_oa_disable_metric_set(stream); |
| 869 | xe_exec_queue_put(q: stream->k_exec_q); |
| 870 | |
| 871 | xe_oa_free_oa_buffer(stream); |
| 872 | |
| 873 | xe_force_wake_put(fw: gt_to_fw(gt), fw_ref: stream->fw_ref); |
| 874 | xe_pm_runtime_put(xe: stream->oa->xe); |
| 875 | |
| 876 | /* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */ |
| 877 | if (stream->override_gucrc) |
| 878 | xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); |
| 879 | |
| 880 | xe_oa_free_configs(stream); |
| 881 | xe_file_put(xef: stream->xef); |
| 882 | } |
| 883 | |
| 884 | static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) |
| 885 | { |
| 886 | struct xe_bo *bo; |
| 887 | |
| 888 | bo = xe_bo_create_pin_map_novm(xe: stream->oa->xe, tile: stream->gt->tile, |
| 889 | size, type: ttm_bo_type_kernel, |
| 890 | XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, intr: false); |
| 891 | if (IS_ERR(ptr: bo)) |
| 892 | return PTR_ERR(ptr: bo); |
| 893 | |
| 894 | stream->oa_buffer.bo = bo; |
| 895 | /* mmap implementation requires OA buffer to be in system memory */ |
| 896 | xe_assert(stream->oa->xe, bo->vmap.is_iomem == 0); |
| 897 | stream->oa_buffer.vaddr = bo->vmap.vaddr; |
| 898 | return 0; |
| 899 | } |
| 900 | |
| 901 | static struct xe_oa_config_bo * |
| 902 | __xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) |
| 903 | { |
| 904 | struct xe_oa_config_bo *oa_bo; |
| 905 | size_t config_length; |
| 906 | struct xe_bb *bb; |
| 907 | |
| 908 | oa_bo = kzalloc(sizeof(*oa_bo), GFP_KERNEL); |
| 909 | if (!oa_bo) |
| 910 | return ERR_PTR(error: -ENOMEM); |
| 911 | |
| 912 | config_length = num_lri_dwords(num_regs: oa_config->regs_len); |
| 913 | config_length = ALIGN(sizeof(u32) * config_length, XE_PAGE_SIZE) / sizeof(u32); |
| 914 | |
| 915 | bb = xe_bb_new(gt: stream->gt, dwords: config_length, usm: false); |
| 916 | if (IS_ERR(ptr: bb)) |
| 917 | goto err_free; |
| 918 | |
| 919 | write_cs_mi_lri(bb, reg_data: oa_config->regs, n_regs: oa_config->regs_len); |
| 920 | |
| 921 | oa_bo->bb = bb; |
| 922 | oa_bo->oa_config = xe_oa_config_get(oa_config); |
| 923 | llist_add(new: &oa_bo->node, head: &stream->oa_config_bos); |
| 924 | |
| 925 | return oa_bo; |
| 926 | err_free: |
| 927 | kfree(objp: oa_bo); |
| 928 | return ERR_CAST(ptr: bb); |
| 929 | } |
| 930 | |
| 931 | static struct xe_oa_config_bo * |
| 932 | xe_oa_alloc_config_buffer(struct xe_oa_stream *stream, struct xe_oa_config *oa_config) |
| 933 | { |
| 934 | struct xe_oa_config_bo *oa_bo; |
| 935 | |
| 936 | /* Look for the buffer in the already allocated BOs attached to the stream */ |
| 937 | llist_for_each_entry(oa_bo, stream->oa_config_bos.first, node) { |
| 938 | if (oa_bo->oa_config == oa_config && |
| 939 | memcmp(p: oa_bo->oa_config->uuid, q: oa_config->uuid, |
| 940 | size: sizeof(oa_config->uuid)) == 0) |
| 941 | goto out; |
| 942 | } |
| 943 | |
| 944 | oa_bo = __xe_oa_alloc_config_buffer(stream, oa_config); |
| 945 | out: |
| 946 | return oa_bo; |
| 947 | } |
| 948 | |
| 949 | static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence) |
| 950 | { |
| 951 | dma_fence_put(fence: stream->last_fence); |
| 952 | stream->last_fence = dma_fence_get(fence); |
| 953 | } |
| 954 | |
| 955 | static void xe_oa_fence_work_fn(struct work_struct *w) |
| 956 | { |
| 957 | struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work); |
| 958 | |
| 959 | /* Signal fence to indicate new OA configuration is active */ |
| 960 | dma_fence_signal(fence: &ofence->base); |
| 961 | dma_fence_put(fence: &ofence->base); |
| 962 | } |
| 963 | |
| 964 | static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb) |
| 965 | { |
| 966 | /* Additional empirical delay needed for NOA programming after registers are written */ |
| 967 | #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 |
| 968 | |
| 969 | struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb); |
| 970 | |
| 971 | INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn); |
| 972 | queue_delayed_work(wq: system_unbound_wq, dwork: &ofence->work, |
| 973 | delay: usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US)); |
| 974 | dma_fence_put(fence); |
| 975 | } |
| 976 | |
| 977 | static const char *xe_oa_get_driver_name(struct dma_fence *fence) |
| 978 | { |
| 979 | return "xe_oa" ; |
| 980 | } |
| 981 | |
| 982 | static const char *xe_oa_get_timeline_name(struct dma_fence *fence) |
| 983 | { |
| 984 | return "unbound" ; |
| 985 | } |
| 986 | |
| 987 | static const struct dma_fence_ops xe_oa_fence_ops = { |
| 988 | .get_driver_name = xe_oa_get_driver_name, |
| 989 | .get_timeline_name = xe_oa_get_timeline_name, |
| 990 | }; |
| 991 | |
| 992 | static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config) |
| 993 | { |
| 994 | #define NOA_PROGRAM_ADDITIONAL_DELAY_US 500 |
| 995 | struct xe_oa_config_bo *oa_bo; |
| 996 | struct xe_oa_fence *ofence; |
| 997 | int i, err, num_signal = 0; |
| 998 | struct dma_fence *fence; |
| 999 | |
| 1000 | ofence = kzalloc(sizeof(*ofence), GFP_KERNEL); |
| 1001 | if (!ofence) { |
| 1002 | err = -ENOMEM; |
| 1003 | goto exit; |
| 1004 | } |
| 1005 | |
| 1006 | oa_bo = xe_oa_alloc_config_buffer(stream, oa_config: config); |
| 1007 | if (IS_ERR(ptr: oa_bo)) { |
| 1008 | err = PTR_ERR(ptr: oa_bo); |
| 1009 | goto exit; |
| 1010 | } |
| 1011 | |
| 1012 | /* Emit OA configuration batch */ |
| 1013 | fence = xe_oa_submit_bb(stream, deps: XE_OA_SUBMIT_ADD_DEPS, bb: oa_bo->bb); |
| 1014 | if (IS_ERR(ptr: fence)) { |
| 1015 | err = PTR_ERR(ptr: fence); |
| 1016 | goto exit; |
| 1017 | } |
| 1018 | |
| 1019 | /* Point of no return: initialize and set fence to signal */ |
| 1020 | spin_lock_init(&ofence->lock); |
| 1021 | dma_fence_init(fence: &ofence->base, ops: &xe_oa_fence_ops, lock: &ofence->lock, context: 0, seqno: 0); |
| 1022 | |
| 1023 | for (i = 0; i < stream->num_syncs; i++) { |
| 1024 | if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL) |
| 1025 | num_signal++; |
| 1026 | xe_sync_entry_signal(sync: &stream->syncs[i], fence: &ofence->base); |
| 1027 | } |
| 1028 | |
| 1029 | /* Additional dma_fence_get in case we dma_fence_wait */ |
| 1030 | if (!num_signal) |
| 1031 | dma_fence_get(fence: &ofence->base); |
| 1032 | |
| 1033 | /* Update last fence too before adding callback */ |
| 1034 | xe_oa_update_last_fence(stream, fence); |
| 1035 | |
| 1036 | /* Add job fence callback to schedule work to signal ofence->base */ |
| 1037 | err = dma_fence_add_callback(fence, cb: &ofence->cb, func: xe_oa_config_cb); |
| 1038 | xe_gt_assert(stream->gt, !err || err == -ENOENT); |
| 1039 | if (err == -ENOENT) |
| 1040 | xe_oa_config_cb(fence, cb: &ofence->cb); |
| 1041 | |
| 1042 | /* If nothing needs to be signaled we wait synchronously */ |
| 1043 | if (!num_signal) { |
| 1044 | dma_fence_wait(fence: &ofence->base, intr: false); |
| 1045 | dma_fence_put(fence: &ofence->base); |
| 1046 | } |
| 1047 | |
| 1048 | /* Done with syncs */ |
| 1049 | for (i = 0; i < stream->num_syncs; i++) |
| 1050 | xe_sync_entry_cleanup(sync: &stream->syncs[i]); |
| 1051 | kfree(objp: stream->syncs); |
| 1052 | |
| 1053 | return 0; |
| 1054 | exit: |
| 1055 | kfree(objp: ofence); |
| 1056 | return err; |
| 1057 | } |
| 1058 | |
| 1059 | static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream) |
| 1060 | { |
| 1061 | /* If user didn't require OA reports, ask HW not to emit ctx switch reports */ |
| 1062 | return _MASKED_FIELD(OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS, |
| 1063 | stream->sample ? |
| 1064 | 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); |
| 1065 | } |
| 1066 | |
| 1067 | static u32 oag_buf_size_select(const struct xe_oa_stream *stream) |
| 1068 | { |
| 1069 | return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, |
| 1070 | xe_bo_size(stream->oa_buffer.bo) > SZ_16M ? |
| 1071 | OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); |
| 1072 | } |
| 1073 | |
| 1074 | static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) |
| 1075 | { |
| 1076 | struct xe_mmio *mmio = &stream->gt->mmio; |
| 1077 | u32 oa_debug, sqcnt1; |
| 1078 | int ret; |
| 1079 | |
| 1080 | /* |
| 1081 | * EU NOA signals behave incorrectly if EU clock gating is enabled. |
| 1082 | * Disable thread stall DOP gating and EU DOP gating. |
| 1083 | */ |
| 1084 | if (XE_GT_WA(stream->gt, 1508761755)) { |
| 1085 | xe_gt_mcr_multicast_write(gt: stream->gt, ROW_CHICKEN, |
| 1086 | _MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE)); |
| 1087 | xe_gt_mcr_multicast_write(gt: stream->gt, ROW_CHICKEN2, |
| 1088 | _MASKED_BIT_ENABLE(DISABLE_DOP_GATING)); |
| 1089 | } |
| 1090 | |
| 1091 | /* Disable clk ratio reports */ |
| 1092 | oa_debug = OAG_OA_DEBUG_DISABLE_CLK_RATIO_REPORTS | |
| 1093 | OAG_OA_DEBUG_INCLUDE_CLK_RATIO; |
| 1094 | |
| 1095 | if (GRAPHICS_VER(stream->oa->xe) >= 20) |
| 1096 | oa_debug |= |
| 1097 | /* The three bits below are needed to get PEC counters running */ |
| 1098 | OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL | |
| 1099 | OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL | |
| 1100 | OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL; |
| 1101 | |
| 1102 | xe_mmio_write32(mmio, reg: __oa_regs(stream)->oa_debug, |
| 1103 | _MASKED_BIT_ENABLE(oa_debug) | |
| 1104 | oag_report_ctx_switches(stream) | |
| 1105 | oag_buf_size_select(stream) | |
| 1106 | oag_configure_mmio_trigger(stream, enable: true)); |
| 1107 | |
| 1108 | xe_mmio_write32(mmio, reg: __oa_regs(stream)->oa_ctx_ctrl, |
| 1109 | OAG_OAGLBCTXCTRL_COUNTER_RESUME | |
| 1110 | (stream->periodic ? |
| 1111 | OAG_OAGLBCTXCTRL_TIMER_ENABLE | |
| 1112 | REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK, |
| 1113 | stream->period_exponent) : 0)); |
| 1114 | |
| 1115 | /* |
| 1116 | * Initialize Super Queue Internal Cnt Register |
| 1117 | * Set PMON Enable in order to collect valid metrics |
| 1118 | * Enable bytes per clock reporting |
| 1119 | */ |
| 1120 | sqcnt1 = SQCNT1_PMON_ENABLE | |
| 1121 | (HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0); |
| 1122 | xe_mmio_rmw32(mmio, XELPMP_SQCNT1, clr: 0, set: sqcnt1); |
| 1123 | |
| 1124 | if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM || |
| 1125 | stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) && |
| 1126 | GRAPHICS_VER(stream->oa->xe) >= 30) |
| 1127 | xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, clr: 0, OAM_LAT_MEASURE_ENABLE); |
| 1128 | |
| 1129 | /* Configure OAR/OAC */ |
| 1130 | if (stream->exec_q) { |
| 1131 | ret = xe_oa_configure_oa_context(stream, enable: true); |
| 1132 | if (ret) |
| 1133 | return ret; |
| 1134 | } |
| 1135 | |
| 1136 | return xe_oa_emit_oa_config(stream, config: stream->oa_config); |
| 1137 | } |
| 1138 | |
| 1139 | static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name) |
| 1140 | { |
| 1141 | u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt); |
| 1142 | u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt); |
| 1143 | u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); |
| 1144 | u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt); |
| 1145 | int idx; |
| 1146 | |
| 1147 | for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) { |
| 1148 | const struct xe_oa_format *f = &oa->oa_formats[idx]; |
| 1149 | |
| 1150 | if (counter_size == f->counter_size && bc_report == f->bc_report && |
| 1151 | type == f->type && counter_sel == f->counter_select) { |
| 1152 | *name = idx; |
| 1153 | return 0; |
| 1154 | } |
| 1155 | } |
| 1156 | |
| 1157 | return -EINVAL; |
| 1158 | } |
| 1159 | |
| 1160 | static struct xe_oa_unit *xe_oa_lookup_oa_unit(struct xe_oa *oa, u32 oa_unit_id) |
| 1161 | { |
| 1162 | struct xe_gt *gt; |
| 1163 | int gt_id, i; |
| 1164 | |
| 1165 | for_each_gt(gt, oa->xe, gt_id) { |
| 1166 | for (i = 0; i < gt->oa.num_oa_units; i++) { |
| 1167 | struct xe_oa_unit *u = >->oa.oa_unit[i]; |
| 1168 | |
| 1169 | if (u->oa_unit_id == oa_unit_id) |
| 1170 | return u; |
| 1171 | } |
| 1172 | } |
| 1173 | |
| 1174 | return NULL; |
| 1175 | } |
| 1176 | |
| 1177 | static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value, |
| 1178 | struct xe_oa_open_param *param) |
| 1179 | { |
| 1180 | param->oa_unit = xe_oa_lookup_oa_unit(oa, oa_unit_id: value); |
| 1181 | if (!param->oa_unit) { |
| 1182 | drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n" , value); |
| 1183 | return -EINVAL; |
| 1184 | } |
| 1185 | return 0; |
| 1186 | } |
| 1187 | |
| 1188 | static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value, |
| 1189 | struct xe_oa_open_param *param) |
| 1190 | { |
| 1191 | param->sample = value; |
| 1192 | return 0; |
| 1193 | } |
| 1194 | |
| 1195 | static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value, |
| 1196 | struct xe_oa_open_param *param) |
| 1197 | { |
| 1198 | param->metric_set = value; |
| 1199 | return 0; |
| 1200 | } |
| 1201 | |
| 1202 | static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value, |
| 1203 | struct xe_oa_open_param *param) |
| 1204 | { |
| 1205 | int ret = decode_oa_format(oa, fmt: value, name: ¶m->oa_format); |
| 1206 | |
| 1207 | if (ret) { |
| 1208 | drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n" , value); |
| 1209 | return ret; |
| 1210 | } |
| 1211 | return 0; |
| 1212 | } |
| 1213 | |
| 1214 | static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value, |
| 1215 | struct xe_oa_open_param *param) |
| 1216 | { |
| 1217 | #define OA_EXPONENT_MAX 31 |
| 1218 | |
| 1219 | if (value > OA_EXPONENT_MAX) { |
| 1220 | drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n" , OA_EXPONENT_MAX); |
| 1221 | return -EINVAL; |
| 1222 | } |
| 1223 | param->period_exponent = value; |
| 1224 | return 0; |
| 1225 | } |
| 1226 | |
| 1227 | static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value, |
| 1228 | struct xe_oa_open_param *param) |
| 1229 | { |
| 1230 | param->disabled = value; |
| 1231 | return 0; |
| 1232 | } |
| 1233 | |
| 1234 | static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value, |
| 1235 | struct xe_oa_open_param *param) |
| 1236 | { |
| 1237 | param->exec_queue_id = value; |
| 1238 | return 0; |
| 1239 | } |
| 1240 | |
| 1241 | static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value, |
| 1242 | struct xe_oa_open_param *param) |
| 1243 | { |
| 1244 | param->engine_instance = value; |
| 1245 | return 0; |
| 1246 | } |
| 1247 | |
| 1248 | static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value, |
| 1249 | struct xe_oa_open_param *param) |
| 1250 | { |
| 1251 | param->no_preempt = value; |
| 1252 | return 0; |
| 1253 | } |
| 1254 | |
| 1255 | static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value, |
| 1256 | struct xe_oa_open_param *param) |
| 1257 | { |
| 1258 | if (XE_IOCTL_DBG(oa->xe, value > DRM_XE_MAX_SYNCS)) |
| 1259 | return -EINVAL; |
| 1260 | |
| 1261 | param->num_syncs = value; |
| 1262 | return 0; |
| 1263 | } |
| 1264 | |
| 1265 | static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value, |
| 1266 | struct xe_oa_open_param *param) |
| 1267 | { |
| 1268 | param->syncs_user = u64_to_user_ptr(value); |
| 1269 | return 0; |
| 1270 | } |
| 1271 | |
| 1272 | static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, |
| 1273 | struct xe_oa_open_param *param) |
| 1274 | { |
| 1275 | if (!is_power_of_2(n: value) || value < SZ_128K || value > SZ_128M) { |
| 1276 | drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n" , value); |
| 1277 | return -EINVAL; |
| 1278 | } |
| 1279 | param->oa_buffer_size = value; |
| 1280 | return 0; |
| 1281 | } |
| 1282 | |
| 1283 | static int xe_oa_set_prop_wait_num_reports(struct xe_oa *oa, u64 value, |
| 1284 | struct xe_oa_open_param *param) |
| 1285 | { |
| 1286 | if (!value) { |
| 1287 | drm_dbg(&oa->xe->drm, "wait_num_reports %llu\n" , value); |
| 1288 | return -EINVAL; |
| 1289 | } |
| 1290 | param->wait_num_reports = value; |
| 1291 | return 0; |
| 1292 | } |
| 1293 | |
| 1294 | static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, |
| 1295 | struct xe_oa_open_param *param) |
| 1296 | { |
| 1297 | return -EINVAL; |
| 1298 | } |
| 1299 | |
| 1300 | typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value, |
| 1301 | struct xe_oa_open_param *param); |
| 1302 | static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = { |
| 1303 | [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id, |
| 1304 | [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa, |
| 1305 | [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, |
| 1306 | [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format, |
| 1307 | [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent, |
| 1308 | [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled, |
| 1309 | [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id, |
| 1310 | [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance, |
| 1311 | [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, |
| 1312 | [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, |
| 1313 | [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, |
| 1314 | [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, |
| 1315 | [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_wait_num_reports, |
| 1316 | }; |
| 1317 | |
| 1318 | static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { |
| 1319 | [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval, |
| 1320 | [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval, |
| 1321 | [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set, |
| 1322 | [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval, |
| 1323 | [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval, |
| 1324 | [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval, |
| 1325 | [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval, |
| 1326 | [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval, |
| 1327 | [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, |
| 1328 | [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, |
| 1329 | [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, |
| 1330 | [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, |
| 1331 | [DRM_XE_OA_PROPERTY_WAIT_NUM_REPORTS] = xe_oa_set_prop_ret_inval, |
| 1332 | }; |
| 1333 | |
| 1334 | static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, |
| 1335 | u64 extension, struct xe_oa_open_param *param) |
| 1336 | { |
| 1337 | u64 __user *address = u64_to_user_ptr(extension); |
| 1338 | struct drm_xe_ext_set_property ext; |
| 1339 | int err; |
| 1340 | u32 idx; |
| 1341 | |
| 1342 | err = copy_from_user(to: &ext, from: address, n: sizeof(ext)); |
| 1343 | if (XE_IOCTL_DBG(oa->xe, err)) |
| 1344 | return -EFAULT; |
| 1345 | |
| 1346 | BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) != |
| 1347 | ARRAY_SIZE(xe_oa_set_property_funcs_config)); |
| 1348 | |
| 1349 | if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) || |
| 1350 | XE_IOCTL_DBG(oa->xe, !ext.property) || XE_IOCTL_DBG(oa->xe, ext.pad)) |
| 1351 | return -EINVAL; |
| 1352 | |
| 1353 | idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open)); |
| 1354 | |
| 1355 | if (from == XE_OA_USER_EXTN_FROM_CONFIG) |
| 1356 | return xe_oa_set_property_funcs_config[idx](oa, ext.value, param); |
| 1357 | else |
| 1358 | return xe_oa_set_property_funcs_open[idx](oa, ext.value, param); |
| 1359 | } |
| 1360 | |
| 1361 | typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from, |
| 1362 | u64 extension, struct xe_oa_open_param *param); |
| 1363 | static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = { |
| 1364 | [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property, |
| 1365 | }; |
| 1366 | |
| 1367 | #define MAX_USER_EXTENSIONS 16 |
| 1368 | static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension, |
| 1369 | int ext_number, struct xe_oa_open_param *param) |
| 1370 | { |
| 1371 | u64 __user *address = u64_to_user_ptr(extension); |
| 1372 | struct drm_xe_user_extension ext; |
| 1373 | int err; |
| 1374 | u32 idx; |
| 1375 | |
| 1376 | if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS)) |
| 1377 | return -E2BIG; |
| 1378 | |
| 1379 | err = copy_from_user(to: &ext, from: address, n: sizeof(ext)); |
| 1380 | if (XE_IOCTL_DBG(oa->xe, err)) |
| 1381 | return -EFAULT; |
| 1382 | |
| 1383 | if (XE_IOCTL_DBG(oa->xe, ext.pad) || |
| 1384 | XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs))) |
| 1385 | return -EINVAL; |
| 1386 | |
| 1387 | idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs)); |
| 1388 | err = xe_oa_user_extension_funcs[idx](oa, from, extension, param); |
| 1389 | if (XE_IOCTL_DBG(oa->xe, err)) |
| 1390 | return err; |
| 1391 | |
| 1392 | if (ext.next_extension) |
| 1393 | return xe_oa_user_extensions(oa, from, extension: ext.next_extension, ext_number: ++ext_number, param); |
| 1394 | |
| 1395 | return 0; |
| 1396 | } |
| 1397 | |
| 1398 | static int xe_oa_parse_syncs(struct xe_oa *oa, |
| 1399 | struct xe_oa_stream *stream, |
| 1400 | struct xe_oa_open_param *param) |
| 1401 | { |
| 1402 | int ret, num_syncs, num_ufence = 0; |
| 1403 | |
| 1404 | if (param->num_syncs && !param->syncs_user) { |
| 1405 | drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n" ); |
| 1406 | ret = -EINVAL; |
| 1407 | goto exit; |
| 1408 | } |
| 1409 | |
| 1410 | if (param->num_syncs) { |
| 1411 | param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL); |
| 1412 | if (!param->syncs) { |
| 1413 | ret = -ENOMEM; |
| 1414 | goto exit; |
| 1415 | } |
| 1416 | } |
| 1417 | |
| 1418 | for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) { |
| 1419 | ret = xe_sync_entry_parse(xe: oa->xe, xef: param->xef, sync: ¶m->syncs[num_syncs], |
| 1420 | sync_user: ¶m->syncs_user[num_syncs], |
| 1421 | ufence_syncobj: stream->ufence_syncobj, |
| 1422 | ufence_timeline_value: ++stream->ufence_timeline_value, flags: 0); |
| 1423 | if (ret) |
| 1424 | goto err_syncs; |
| 1425 | |
| 1426 | if (xe_sync_is_ufence(sync: ¶m->syncs[num_syncs])) |
| 1427 | num_ufence++; |
| 1428 | } |
| 1429 | |
| 1430 | if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) { |
| 1431 | ret = -EINVAL; |
| 1432 | goto err_syncs; |
| 1433 | } |
| 1434 | |
| 1435 | return 0; |
| 1436 | |
| 1437 | err_syncs: |
| 1438 | while (num_syncs--) |
| 1439 | xe_sync_entry_cleanup(sync: ¶m->syncs[num_syncs]); |
| 1440 | kfree(objp: param->syncs); |
| 1441 | exit: |
| 1442 | return ret; |
| 1443 | } |
| 1444 | |
| 1445 | static void xe_oa_stream_enable(struct xe_oa_stream *stream) |
| 1446 | { |
| 1447 | stream->pollin = false; |
| 1448 | |
| 1449 | xe_oa_enable(stream); |
| 1450 | |
| 1451 | if (stream->sample) |
| 1452 | hrtimer_start(timer: &stream->poll_check_timer, |
| 1453 | tim: ns_to_ktime(ns: stream->poll_period_ns), |
| 1454 | mode: HRTIMER_MODE_REL_PINNED); |
| 1455 | } |
| 1456 | |
| 1457 | static void xe_oa_stream_disable(struct xe_oa_stream *stream) |
| 1458 | { |
| 1459 | xe_oa_disable(stream); |
| 1460 | |
| 1461 | if (stream->sample) |
| 1462 | hrtimer_cancel(timer: &stream->poll_check_timer); |
| 1463 | } |
| 1464 | |
| 1465 | static int xe_oa_enable_preempt_timeslice(struct xe_oa_stream *stream) |
| 1466 | { |
| 1467 | struct xe_exec_queue *q = stream->exec_q; |
| 1468 | int ret1, ret2; |
| 1469 | |
| 1470 | /* Best effort recovery: try to revert both to original, irrespective of error */ |
| 1471 | ret1 = q->ops->set_timeslice(q, stream->hwe->eclass->sched_props.timeslice_us); |
| 1472 | ret2 = q->ops->set_preempt_timeout(q, stream->hwe->eclass->sched_props.preempt_timeout_us); |
| 1473 | if (ret1 || ret2) |
| 1474 | goto err; |
| 1475 | return 0; |
| 1476 | err: |
| 1477 | drm_dbg(&stream->oa->xe->drm, "%s failed ret1 %d ret2 %d\n" , __func__, ret1, ret2); |
| 1478 | return ret1 ?: ret2; |
| 1479 | } |
| 1480 | |
| 1481 | static int xe_oa_disable_preempt_timeslice(struct xe_oa_stream *stream) |
| 1482 | { |
| 1483 | struct xe_exec_queue *q = stream->exec_q; |
| 1484 | int ret; |
| 1485 | |
| 1486 | /* Setting values to 0 will disable timeslice and preempt_timeout */ |
| 1487 | ret = q->ops->set_timeslice(q, 0); |
| 1488 | if (ret) |
| 1489 | goto err; |
| 1490 | |
| 1491 | ret = q->ops->set_preempt_timeout(q, 0); |
| 1492 | if (ret) |
| 1493 | goto err; |
| 1494 | |
| 1495 | return 0; |
| 1496 | err: |
| 1497 | xe_oa_enable_preempt_timeslice(stream); |
| 1498 | drm_dbg(&stream->oa->xe->drm, "%s failed %d\n" , __func__, ret); |
| 1499 | return ret; |
| 1500 | } |
| 1501 | |
| 1502 | static int xe_oa_enable_locked(struct xe_oa_stream *stream) |
| 1503 | { |
| 1504 | if (stream->enabled) |
| 1505 | return 0; |
| 1506 | |
| 1507 | if (stream->no_preempt) { |
| 1508 | int ret = xe_oa_disable_preempt_timeslice(stream); |
| 1509 | |
| 1510 | if (ret) |
| 1511 | return ret; |
| 1512 | } |
| 1513 | |
| 1514 | xe_oa_stream_enable(stream); |
| 1515 | |
| 1516 | stream->enabled = true; |
| 1517 | return 0; |
| 1518 | } |
| 1519 | |
| 1520 | static int xe_oa_disable_locked(struct xe_oa_stream *stream) |
| 1521 | { |
| 1522 | int ret = 0; |
| 1523 | |
| 1524 | if (!stream->enabled) |
| 1525 | return 0; |
| 1526 | |
| 1527 | xe_oa_stream_disable(stream); |
| 1528 | |
| 1529 | if (stream->no_preempt) |
| 1530 | ret = xe_oa_enable_preempt_timeslice(stream); |
| 1531 | |
| 1532 | stream->enabled = false; |
| 1533 | return ret; |
| 1534 | } |
| 1535 | |
| 1536 | static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg) |
| 1537 | { |
| 1538 | struct xe_oa_open_param param = {}; |
| 1539 | long ret = stream->oa_config->id; |
| 1540 | struct xe_oa_config *config; |
| 1541 | int err; |
| 1542 | |
| 1543 | err = xe_oa_user_extensions(oa: stream->oa, from: XE_OA_USER_EXTN_FROM_CONFIG, extension: arg, ext_number: 0, param: ¶m); |
| 1544 | if (err) |
| 1545 | return err; |
| 1546 | |
| 1547 | config = xe_oa_get_oa_config(oa: stream->oa, metrics_set: param.metric_set); |
| 1548 | if (!config) |
| 1549 | return -ENODEV; |
| 1550 | |
| 1551 | param.xef = stream->xef; |
| 1552 | err = xe_oa_parse_syncs(oa: stream->oa, stream, param: ¶m); |
| 1553 | if (err) |
| 1554 | goto err_config_put; |
| 1555 | |
| 1556 | stream->num_syncs = param.num_syncs; |
| 1557 | stream->syncs = param.syncs; |
| 1558 | |
| 1559 | err = xe_oa_emit_oa_config(stream, config); |
| 1560 | if (!err) { |
| 1561 | config = xchg(&stream->oa_config, config); |
| 1562 | drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n" , |
| 1563 | stream->oa_config->uuid); |
| 1564 | } |
| 1565 | |
| 1566 | err_config_put: |
| 1567 | xe_oa_config_put(oa_config: config); |
| 1568 | |
| 1569 | return err ?: ret; |
| 1570 | } |
| 1571 | |
| 1572 | static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) |
| 1573 | { |
| 1574 | struct drm_xe_oa_stream_status status = {}; |
| 1575 | void __user *uaddr = (void __user *)arg; |
| 1576 | |
| 1577 | /* Map from register to uapi bits */ |
| 1578 | if (stream->oa_status & OASTATUS_REPORT_LOST) |
| 1579 | status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; |
| 1580 | if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) |
| 1581 | status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; |
| 1582 | if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) |
| 1583 | status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; |
| 1584 | if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) |
| 1585 | status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; |
| 1586 | |
| 1587 | if (copy_to_user(to: uaddr, from: &status, n: sizeof(status))) |
| 1588 | return -EFAULT; |
| 1589 | |
| 1590 | return 0; |
| 1591 | } |
| 1592 | |
| 1593 | static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) |
| 1594 | { |
| 1595 | struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(bo: stream->oa_buffer.bo), }; |
| 1596 | void __user *uaddr = (void __user *)arg; |
| 1597 | |
| 1598 | if (copy_to_user(to: uaddr, from: &info, n: sizeof(info))) |
| 1599 | return -EFAULT; |
| 1600 | |
| 1601 | return 0; |
| 1602 | } |
| 1603 | |
| 1604 | static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, |
| 1605 | unsigned int cmd, |
| 1606 | unsigned long arg) |
| 1607 | { |
| 1608 | switch (cmd) { |
| 1609 | case DRM_XE_OBSERVATION_IOCTL_ENABLE: |
| 1610 | return xe_oa_enable_locked(stream); |
| 1611 | case DRM_XE_OBSERVATION_IOCTL_DISABLE: |
| 1612 | return xe_oa_disable_locked(stream); |
| 1613 | case DRM_XE_OBSERVATION_IOCTL_CONFIG: |
| 1614 | return xe_oa_config_locked(stream, arg); |
| 1615 | case DRM_XE_OBSERVATION_IOCTL_STATUS: |
| 1616 | return xe_oa_status_locked(stream, arg); |
| 1617 | case DRM_XE_OBSERVATION_IOCTL_INFO: |
| 1618 | return xe_oa_info_locked(stream, arg); |
| 1619 | } |
| 1620 | |
| 1621 | return -EINVAL; |
| 1622 | } |
| 1623 | |
| 1624 | static long xe_oa_ioctl(struct file *file, |
| 1625 | unsigned int cmd, |
| 1626 | unsigned long arg) |
| 1627 | { |
| 1628 | struct xe_oa_stream *stream = file->private_data; |
| 1629 | long ret; |
| 1630 | |
| 1631 | mutex_lock(&stream->stream_lock); |
| 1632 | ret = xe_oa_ioctl_locked(stream, cmd, arg); |
| 1633 | mutex_unlock(lock: &stream->stream_lock); |
| 1634 | |
| 1635 | return ret; |
| 1636 | } |
| 1637 | |
| 1638 | static void xe_oa_destroy_locked(struct xe_oa_stream *stream) |
| 1639 | { |
| 1640 | if (stream->enabled) |
| 1641 | xe_oa_disable_locked(stream); |
| 1642 | |
| 1643 | xe_oa_stream_destroy(stream); |
| 1644 | |
| 1645 | if (stream->exec_q) |
| 1646 | xe_exec_queue_put(q: stream->exec_q); |
| 1647 | |
| 1648 | drm_syncobj_put(obj: stream->ufence_syncobj); |
| 1649 | kfree(objp: stream); |
| 1650 | } |
| 1651 | |
| 1652 | static int xe_oa_release(struct inode *inode, struct file *file) |
| 1653 | { |
| 1654 | struct xe_oa_stream *stream = file->private_data; |
| 1655 | struct xe_gt *gt = stream->gt; |
| 1656 | |
| 1657 | xe_pm_runtime_get(gt_to_xe(gt)); |
| 1658 | mutex_lock(>->oa.gt_lock); |
| 1659 | xe_oa_destroy_locked(stream); |
| 1660 | mutex_unlock(lock: >->oa.gt_lock); |
| 1661 | xe_pm_runtime_put(gt_to_xe(gt)); |
| 1662 | |
| 1663 | /* Release the reference the OA stream kept on the driver */ |
| 1664 | drm_dev_put(dev: >_to_xe(gt)->drm); |
| 1665 | |
| 1666 | return 0; |
| 1667 | } |
| 1668 | |
| 1669 | static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma) |
| 1670 | { |
| 1671 | struct xe_oa_stream *stream = file->private_data; |
| 1672 | struct xe_bo *bo = stream->oa_buffer.bo; |
| 1673 | unsigned long start = vma->vm_start; |
| 1674 | int i, ret; |
| 1675 | |
| 1676 | if (xe_observation_paranoid && !perfmon_capable()) { |
| 1677 | drm_dbg(&stream->oa->xe->drm, "Insufficient privilege to map OA buffer\n" ); |
| 1678 | return -EACCES; |
| 1679 | } |
| 1680 | |
| 1681 | /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ |
| 1682 | if (vma->vm_end - vma->vm_start != xe_bo_size(bo: stream->oa_buffer.bo)) { |
| 1683 | drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n" ); |
| 1684 | return -EINVAL; |
| 1685 | } |
| 1686 | |
| 1687 | /* |
| 1688 | * Only support VM_READ, enforce MAP_PRIVATE by checking for |
| 1689 | * VM_MAYSHARE, don't copy the vma on fork |
| 1690 | */ |
| 1691 | if (vma->vm_flags & (VM_WRITE | VM_EXEC | VM_SHARED | VM_MAYSHARE)) { |
| 1692 | drm_dbg(&stream->oa->xe->drm, "mmap must be read only\n" ); |
| 1693 | return -EINVAL; |
| 1694 | } |
| 1695 | vm_flags_mod(vma, VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY, |
| 1696 | VM_MAYWRITE | VM_MAYEXEC); |
| 1697 | |
| 1698 | xe_assert(stream->oa->xe, bo->ttm.ttm->num_pages == vma_pages(vma)); |
| 1699 | for (i = 0; i < bo->ttm.ttm->num_pages; i++) { |
| 1700 | ret = remap_pfn_range(vma, addr: start, page_to_pfn(bo->ttm.ttm->pages[i]), |
| 1701 | PAGE_SIZE, pgprot: vma->vm_page_prot); |
| 1702 | if (ret) |
| 1703 | break; |
| 1704 | |
| 1705 | start += PAGE_SIZE; |
| 1706 | } |
| 1707 | |
| 1708 | return ret; |
| 1709 | } |
| 1710 | |
| 1711 | static const struct file_operations xe_oa_fops = { |
| 1712 | .owner = THIS_MODULE, |
| 1713 | .release = xe_oa_release, |
| 1714 | .poll = xe_oa_poll, |
| 1715 | .read = xe_oa_read, |
| 1716 | .unlocked_ioctl = xe_oa_ioctl, |
| 1717 | .mmap = xe_oa_mmap, |
| 1718 | }; |
| 1719 | |
| 1720 | static int xe_oa_stream_init(struct xe_oa_stream *stream, |
| 1721 | struct xe_oa_open_param *param) |
| 1722 | { |
| 1723 | struct xe_gt *gt = param->hwe->gt; |
| 1724 | int ret; |
| 1725 | |
| 1726 | stream->exec_q = param->exec_q; |
| 1727 | stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS; |
| 1728 | stream->oa_unit = param->oa_unit; |
| 1729 | stream->hwe = param->hwe; |
| 1730 | stream->gt = stream->hwe->gt; |
| 1731 | stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format]; |
| 1732 | |
| 1733 | stream->sample = param->sample; |
| 1734 | stream->periodic = param->period_exponent >= 0; |
| 1735 | stream->period_exponent = param->period_exponent; |
| 1736 | stream->no_preempt = param->no_preempt; |
| 1737 | stream->wait_num_reports = param->wait_num_reports; |
| 1738 | |
| 1739 | stream->xef = xe_file_get(xef: param->xef); |
| 1740 | stream->num_syncs = param->num_syncs; |
| 1741 | stream->syncs = param->syncs; |
| 1742 | |
| 1743 | /* |
| 1744 | * For Xe2+, when overrun mode is enabled, there are no partial reports at the end |
| 1745 | * of buffer, making the OA buffer effectively a non-power-of-2 size circular |
| 1746 | * buffer whose size, circ_size, is a multiple of the report size |
| 1747 | */ |
| 1748 | if (GRAPHICS_VER(stream->oa->xe) >= 20 && |
| 1749 | stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) |
| 1750 | stream->oa_buffer.circ_size = |
| 1751 | param->oa_buffer_size - |
| 1752 | param->oa_buffer_size % stream->oa_buffer.format->size; |
| 1753 | else |
| 1754 | stream->oa_buffer.circ_size = param->oa_buffer_size; |
| 1755 | |
| 1756 | stream->oa_config = xe_oa_get_oa_config(oa: stream->oa, metrics_set: param->metric_set); |
| 1757 | if (!stream->oa_config) { |
| 1758 | drm_dbg(&stream->oa->xe->drm, "Invalid OA config id=%i\n" , param->metric_set); |
| 1759 | ret = -EINVAL; |
| 1760 | goto exit; |
| 1761 | } |
| 1762 | |
| 1763 | /* |
| 1764 | * GuC reset of engines causes OA to lose configuration |
| 1765 | * state. Prevent this by overriding GUCRC mode. |
| 1766 | */ |
| 1767 | if (XE_GT_WA(stream->gt, 1509372804)) { |
| 1768 | ret = xe_guc_pc_override_gucrc_mode(pc: >->uc.guc.pc, |
| 1769 | mode: SLPC_GUCRC_MODE_GUCRC_NO_RC6); |
| 1770 | if (ret) |
| 1771 | goto err_free_configs; |
| 1772 | |
| 1773 | stream->override_gucrc = true; |
| 1774 | } |
| 1775 | |
| 1776 | /* Take runtime pm ref and forcewake to disable RC6 */ |
| 1777 | xe_pm_runtime_get(xe: stream->oa->xe); |
| 1778 | stream->fw_ref = xe_force_wake_get(fw: gt_to_fw(gt), domains: XE_FORCEWAKE_ALL); |
| 1779 | if (!xe_force_wake_ref_has_domain(fw_ref: stream->fw_ref, domain: XE_FORCEWAKE_ALL)) { |
| 1780 | ret = -ETIMEDOUT; |
| 1781 | goto err_fw_put; |
| 1782 | } |
| 1783 | |
| 1784 | ret = xe_oa_alloc_oa_buffer(stream, size: param->oa_buffer_size); |
| 1785 | if (ret) |
| 1786 | goto err_fw_put; |
| 1787 | |
| 1788 | stream->k_exec_q = xe_exec_queue_create(xe: stream->oa->xe, NULL, |
| 1789 | BIT(stream->hwe->logical_instance), width: 1, |
| 1790 | hw_engine: stream->hwe, EXEC_QUEUE_FLAG_KERNEL, extensions: 0); |
| 1791 | if (IS_ERR(ptr: stream->k_exec_q)) { |
| 1792 | ret = PTR_ERR(ptr: stream->k_exec_q); |
| 1793 | drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d" , |
| 1794 | stream->gt->info.id, stream->hwe->name, ret); |
| 1795 | goto err_free_oa_buf; |
| 1796 | } |
| 1797 | |
| 1798 | ret = xe_oa_enable_metric_set(stream); |
| 1799 | if (ret) { |
| 1800 | drm_dbg(&stream->oa->xe->drm, "Unable to enable metric set\n" ); |
| 1801 | goto err_put_k_exec_q; |
| 1802 | } |
| 1803 | |
| 1804 | drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n" , |
| 1805 | stream->oa_config->uuid); |
| 1806 | |
| 1807 | WRITE_ONCE(stream->oa_unit->exclusive_stream, stream); |
| 1808 | |
| 1809 | hrtimer_setup(timer: &stream->poll_check_timer, function: xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC, |
| 1810 | mode: HRTIMER_MODE_REL); |
| 1811 | init_waitqueue_head(&stream->poll_wq); |
| 1812 | |
| 1813 | spin_lock_init(&stream->oa_buffer.ptr_lock); |
| 1814 | mutex_init(&stream->stream_lock); |
| 1815 | |
| 1816 | return 0; |
| 1817 | |
| 1818 | err_put_k_exec_q: |
| 1819 | xe_oa_disable_metric_set(stream); |
| 1820 | xe_exec_queue_put(q: stream->k_exec_q); |
| 1821 | err_free_oa_buf: |
| 1822 | xe_oa_free_oa_buffer(stream); |
| 1823 | err_fw_put: |
| 1824 | xe_force_wake_put(fw: gt_to_fw(gt), fw_ref: stream->fw_ref); |
| 1825 | xe_pm_runtime_put(xe: stream->oa->xe); |
| 1826 | if (stream->override_gucrc) |
| 1827 | xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(>->uc.guc.pc)); |
| 1828 | err_free_configs: |
| 1829 | xe_oa_free_configs(stream); |
| 1830 | exit: |
| 1831 | xe_file_put(xef: stream->xef); |
| 1832 | return ret; |
| 1833 | } |
| 1834 | |
| 1835 | static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa, |
| 1836 | struct xe_oa_open_param *param) |
| 1837 | { |
| 1838 | struct xe_oa_stream *stream; |
| 1839 | struct drm_syncobj *ufence_syncobj; |
| 1840 | int stream_fd; |
| 1841 | int ret; |
| 1842 | |
| 1843 | /* We currently only allow exclusive access */ |
| 1844 | if (param->oa_unit->exclusive_stream) { |
| 1845 | drm_dbg(&oa->xe->drm, "OA unit already in use\n" ); |
| 1846 | ret = -EBUSY; |
| 1847 | goto exit; |
| 1848 | } |
| 1849 | |
| 1850 | ret = drm_syncobj_create(out_syncobj: &ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, |
| 1851 | NULL); |
| 1852 | if (ret) |
| 1853 | goto exit; |
| 1854 | |
| 1855 | stream = kzalloc(sizeof(*stream), GFP_KERNEL); |
| 1856 | if (!stream) { |
| 1857 | ret = -ENOMEM; |
| 1858 | goto err_syncobj; |
| 1859 | } |
| 1860 | stream->ufence_syncobj = ufence_syncobj; |
| 1861 | stream->oa = oa; |
| 1862 | |
| 1863 | ret = xe_oa_parse_syncs(oa, stream, param); |
| 1864 | if (ret) |
| 1865 | goto err_free; |
| 1866 | |
| 1867 | ret = xe_oa_stream_init(stream, param); |
| 1868 | if (ret) { |
| 1869 | while (param->num_syncs--) |
| 1870 | xe_sync_entry_cleanup(sync: ¶m->syncs[param->num_syncs]); |
| 1871 | kfree(objp: param->syncs); |
| 1872 | goto err_free; |
| 1873 | } |
| 1874 | |
| 1875 | if (!param->disabled) { |
| 1876 | ret = xe_oa_enable_locked(stream); |
| 1877 | if (ret) |
| 1878 | goto err_destroy; |
| 1879 | } |
| 1880 | |
| 1881 | stream_fd = anon_inode_getfd(name: "[xe_oa]" , fops: &xe_oa_fops, priv: stream, flags: 0); |
| 1882 | if (stream_fd < 0) { |
| 1883 | ret = stream_fd; |
| 1884 | goto err_disable; |
| 1885 | } |
| 1886 | |
| 1887 | /* Hold a reference on the drm device till stream_fd is released */ |
| 1888 | drm_dev_get(dev: &stream->oa->xe->drm); |
| 1889 | |
| 1890 | return stream_fd; |
| 1891 | err_disable: |
| 1892 | if (!param->disabled) |
| 1893 | xe_oa_disable_locked(stream); |
| 1894 | err_destroy: |
| 1895 | xe_oa_stream_destroy(stream); |
| 1896 | err_free: |
| 1897 | kfree(objp: stream); |
| 1898 | err_syncobj: |
| 1899 | drm_syncobj_put(obj: ufence_syncobj); |
| 1900 | exit: |
| 1901 | return ret; |
| 1902 | } |
| 1903 | |
| 1904 | /** |
| 1905 | * xe_oa_timestamp_frequency - Return OA timestamp frequency |
| 1906 | * @gt: @xe_gt |
| 1907 | * |
| 1908 | * OA timestamp frequency = CS timestamp frequency in most platforms. On some |
| 1909 | * platforms OA unit ignores the CTC_SHIFT and the 2 timestamps differ. In such |
| 1910 | * cases, return the adjusted CS timestamp frequency to the user. |
| 1911 | */ |
| 1912 | u32 xe_oa_timestamp_frequency(struct xe_gt *gt) |
| 1913 | { |
| 1914 | u32 reg, shift; |
| 1915 | |
| 1916 | if (XE_GT_WA(gt, 18013179988) || XE_GT_WA(gt, 14015568240)) { |
| 1917 | xe_pm_runtime_get(gt_to_xe(gt)); |
| 1918 | reg = xe_mmio_read32(mmio: >->mmio, RPM_CONFIG0); |
| 1919 | xe_pm_runtime_put(gt_to_xe(gt)); |
| 1920 | |
| 1921 | shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg); |
| 1922 | return gt->info.reference_clock << (3 - shift); |
| 1923 | } else { |
| 1924 | return gt->info.reference_clock; |
| 1925 | } |
| 1926 | } |
| 1927 | |
| 1928 | static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent) |
| 1929 | { |
| 1930 | u64 nom = (2ULL << exponent) * NSEC_PER_SEC; |
| 1931 | u32 den = xe_oa_timestamp_frequency(gt); |
| 1932 | |
| 1933 | return div_u64(dividend: nom + den - 1, divisor: den); |
| 1934 | } |
| 1935 | |
| 1936 | static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type) |
| 1937 | { |
| 1938 | switch (param->oa_unit->type) { |
| 1939 | case DRM_XE_OA_UNIT_TYPE_OAG: |
| 1940 | return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR || |
| 1941 | type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC; |
| 1942 | case DRM_XE_OA_UNIT_TYPE_OAM: |
| 1943 | case DRM_XE_OA_UNIT_TYPE_OAM_SAG: |
| 1944 | return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC; |
| 1945 | default: |
| 1946 | return false; |
| 1947 | } |
| 1948 | } |
| 1949 | |
| 1950 | /** |
| 1951 | * xe_oa_unit_id - Return OA unit ID for a hardware engine |
| 1952 | * @hwe: @xe_hw_engine |
| 1953 | * |
| 1954 | * Return OA unit ID for a hardware engine when available |
| 1955 | */ |
| 1956 | u16 xe_oa_unit_id(struct xe_hw_engine *hwe) |
| 1957 | { |
| 1958 | return hwe->oa_unit && hwe->oa_unit->num_engines ? |
| 1959 | hwe->oa_unit->oa_unit_id : U16_MAX; |
| 1960 | } |
| 1961 | |
| 1962 | /* A hwe must be assigned to stream/oa_unit for batch submissions */ |
| 1963 | static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param) |
| 1964 | { |
| 1965 | struct xe_hw_engine *hwe; |
| 1966 | enum xe_hw_engine_id id; |
| 1967 | int ret = 0; |
| 1968 | |
| 1969 | /* If not provided, OA unit defaults to OA unit 0 as per uapi */ |
| 1970 | if (!param->oa_unit) |
| 1971 | param->oa_unit = &xe_root_mmio_gt(xe: oa->xe)->oa.oa_unit[0]; |
| 1972 | |
| 1973 | /* When we have an exec_q, get hwe from the exec_q */ |
| 1974 | if (param->exec_q) { |
| 1975 | param->hwe = xe_gt_hw_engine(gt: param->exec_q->gt, class: param->exec_q->class, |
| 1976 | instance: param->engine_instance, logical: true); |
| 1977 | if (!param->hwe || param->hwe->oa_unit != param->oa_unit) |
| 1978 | goto err; |
| 1979 | goto out; |
| 1980 | } |
| 1981 | |
| 1982 | /* Else just get the first hwe attached to the oa unit */ |
| 1983 | for_each_hw_engine(hwe, param->oa_unit->gt, id) { |
| 1984 | if (hwe->oa_unit == param->oa_unit) { |
| 1985 | param->hwe = hwe; |
| 1986 | goto out; |
| 1987 | } |
| 1988 | } |
| 1989 | |
| 1990 | /* If we still didn't find a hwe, just get one with a valid oa_unit from the same gt */ |
| 1991 | for_each_hw_engine(hwe, param->oa_unit->gt, id) { |
| 1992 | if (!hwe->oa_unit) |
| 1993 | continue; |
| 1994 | |
| 1995 | param->hwe = hwe; |
| 1996 | goto out; |
| 1997 | } |
| 1998 | err: |
| 1999 | drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n" , |
| 2000 | param->exec_q ? param->exec_q->class : -1, |
| 2001 | param->engine_instance, param->oa_unit->oa_unit_id); |
| 2002 | ret = -EINVAL; |
| 2003 | out: |
| 2004 | return ret; |
| 2005 | } |
| 2006 | |
| 2007 | /** |
| 2008 | * xe_oa_stream_open_ioctl - Opens an OA stream |
| 2009 | * @dev: @drm_device |
| 2010 | * @data: pointer to struct @drm_xe_oa_config |
| 2011 | * @file: @drm_file |
| 2012 | * |
| 2013 | * The functions opens an OA stream. An OA stream, opened with specified |
| 2014 | * properties, enables OA counter samples to be collected, either |
| 2015 | * periodically (time based sampling), or on request (using OA queries) |
| 2016 | */ |
| 2017 | int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) |
| 2018 | { |
| 2019 | struct xe_device *xe = to_xe_device(dev); |
| 2020 | struct xe_oa *oa = &xe->oa; |
| 2021 | struct xe_file *xef = to_xe_file(file); |
| 2022 | struct xe_oa_open_param param = {}; |
| 2023 | const struct xe_oa_format *f; |
| 2024 | bool privileged_op = true; |
| 2025 | int ret; |
| 2026 | |
| 2027 | if (!oa->xe) { |
| 2028 | drm_dbg(&xe->drm, "xe oa interface not available for this system\n" ); |
| 2029 | return -ENODEV; |
| 2030 | } |
| 2031 | |
| 2032 | param.xef = xef; |
| 2033 | param.period_exponent = -1; |
| 2034 | ret = xe_oa_user_extensions(oa, from: XE_OA_USER_EXTN_FROM_OPEN, extension: data, ext_number: 0, param: ¶m); |
| 2035 | if (ret) |
| 2036 | return ret; |
| 2037 | |
| 2038 | if (param.exec_queue_id > 0) { |
| 2039 | param.exec_q = xe_exec_queue_lookup(xef, id: param.exec_queue_id); |
| 2040 | if (XE_IOCTL_DBG(oa->xe, !param.exec_q)) |
| 2041 | return -ENOENT; |
| 2042 | |
| 2043 | if (XE_IOCTL_DBG(oa->xe, param.exec_q->width > 1)) |
| 2044 | return -EOPNOTSUPP; |
| 2045 | } |
| 2046 | |
| 2047 | /* |
| 2048 | * Query based sampling (using MI_REPORT_PERF_COUNT) with OAR/OAC, |
| 2049 | * without global stream access, can be an unprivileged operation |
| 2050 | */ |
| 2051 | if (param.exec_q && !param.sample) |
| 2052 | privileged_op = false; |
| 2053 | |
| 2054 | if (param.no_preempt) { |
| 2055 | if (!param.exec_q) { |
| 2056 | drm_dbg(&oa->xe->drm, "Preemption disable without exec_q!\n" ); |
| 2057 | ret = -EINVAL; |
| 2058 | goto err_exec_q; |
| 2059 | } |
| 2060 | privileged_op = true; |
| 2061 | } |
| 2062 | |
| 2063 | if (privileged_op && xe_observation_paranoid && !perfmon_capable()) { |
| 2064 | drm_dbg(&oa->xe->drm, "Insufficient privileges to open xe OA stream\n" ); |
| 2065 | ret = -EACCES; |
| 2066 | goto err_exec_q; |
| 2067 | } |
| 2068 | |
| 2069 | if (!param.exec_q && !param.sample) { |
| 2070 | drm_dbg(&oa->xe->drm, "Only OA report sampling supported\n" ); |
| 2071 | ret = -EINVAL; |
| 2072 | goto err_exec_q; |
| 2073 | } |
| 2074 | |
| 2075 | ret = xe_oa_assign_hwe(oa, param: ¶m); |
| 2076 | if (ret) |
| 2077 | goto err_exec_q; |
| 2078 | |
| 2079 | f = &oa->oa_formats[param.oa_format]; |
| 2080 | if (!param.oa_format || !f->size || |
| 2081 | !oa_unit_supports_oa_format(param: ¶m, type: f->type)) { |
| 2082 | drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n" , |
| 2083 | param.oa_format, f->type, f->size, param.hwe->class); |
| 2084 | ret = -EINVAL; |
| 2085 | goto err_exec_q; |
| 2086 | } |
| 2087 | |
| 2088 | if (param.period_exponent >= 0) { |
| 2089 | u64 oa_period, oa_freq_hz; |
| 2090 | |
| 2091 | /* Requesting samples from OAG buffer is a privileged operation */ |
| 2092 | if (!param.sample) { |
| 2093 | drm_dbg(&oa->xe->drm, "OA_EXPONENT specified without SAMPLE_OA\n" ); |
| 2094 | ret = -EINVAL; |
| 2095 | goto err_exec_q; |
| 2096 | } |
| 2097 | oa_period = oa_exponent_to_ns(gt: param.hwe->gt, exponent: param.period_exponent); |
| 2098 | oa_freq_hz = div64_u64(NSEC_PER_SEC, divisor: oa_period); |
| 2099 | drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n" , oa_freq_hz); |
| 2100 | } |
| 2101 | |
| 2102 | if (!param.oa_buffer_size) |
| 2103 | param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; |
| 2104 | |
| 2105 | if (!param.wait_num_reports) |
| 2106 | param.wait_num_reports = 1; |
| 2107 | if (param.wait_num_reports > param.oa_buffer_size / f->size) { |
| 2108 | drm_dbg(&oa->xe->drm, "wait_num_reports %d\n" , param.wait_num_reports); |
| 2109 | ret = -EINVAL; |
| 2110 | goto err_exec_q; |
| 2111 | } |
| 2112 | |
| 2113 | mutex_lock(¶m.hwe->gt->oa.gt_lock); |
| 2114 | ret = xe_oa_stream_open_ioctl_locked(oa, param: ¶m); |
| 2115 | mutex_unlock(lock: ¶m.hwe->gt->oa.gt_lock); |
| 2116 | if (ret < 0) |
| 2117 | goto err_exec_q; |
| 2118 | |
| 2119 | return ret; |
| 2120 | |
| 2121 | err_exec_q: |
| 2122 | if (param.exec_q) |
| 2123 | xe_exec_queue_put(q: param.exec_q); |
| 2124 | return ret; |
| 2125 | } |
| 2126 | |
| 2127 | static bool xe_oa_is_valid_flex_addr(struct xe_oa *oa, u32 addr) |
| 2128 | { |
| 2129 | static const struct xe_reg flex_eu_regs[] = { |
| 2130 | EU_PERF_CNTL0, |
| 2131 | EU_PERF_CNTL1, |
| 2132 | EU_PERF_CNTL2, |
| 2133 | EU_PERF_CNTL3, |
| 2134 | EU_PERF_CNTL4, |
| 2135 | EU_PERF_CNTL5, |
| 2136 | EU_PERF_CNTL6, |
| 2137 | }; |
| 2138 | int i; |
| 2139 | |
| 2140 | for (i = 0; i < ARRAY_SIZE(flex_eu_regs); i++) { |
| 2141 | if (flex_eu_regs[i].addr == addr) |
| 2142 | return true; |
| 2143 | } |
| 2144 | return false; |
| 2145 | } |
| 2146 | |
| 2147 | static bool xe_oa_reg_in_range_table(u32 addr, const struct xe_mmio_range *table) |
| 2148 | { |
| 2149 | while (table->start && table->end) { |
| 2150 | if (addr >= table->start && addr <= table->end) |
| 2151 | return true; |
| 2152 | |
| 2153 | table++; |
| 2154 | } |
| 2155 | |
| 2156 | return false; |
| 2157 | } |
| 2158 | |
| 2159 | static const struct xe_mmio_range xehp_oa_b_counters[] = { |
| 2160 | { .start = 0xdc48, .end = 0xdc48 }, /* OAA_ENABLE_REG */ |
| 2161 | { .start = 0xdd00, .end = 0xdd48 }, /* OAG_LCE0_0 - OAA_LENABLE_REG */ |
| 2162 | {} |
| 2163 | }; |
| 2164 | |
| 2165 | static const struct xe_mmio_range gen12_oa_b_counters[] = { |
| 2166 | { .start = 0x2b2c, .end = 0x2b2c }, /* OAG_OA_PESS */ |
| 2167 | { .start = 0xd900, .end = 0xd91c }, /* OAG_OASTARTTRIG[1-8] */ |
| 2168 | { .start = 0xd920, .end = 0xd93c }, /* OAG_OAREPORTTRIG1[1-8] */ |
| 2169 | { .start = 0xd940, .end = 0xd97c }, /* OAG_CEC[0-7][0-1] */ |
| 2170 | { .start = 0xdc00, .end = 0xdc3c }, /* OAG_SCEC[0-7][0-1] */ |
| 2171 | { .start = 0xdc40, .end = 0xdc40 }, /* OAG_SPCTR_CNF */ |
| 2172 | { .start = 0xdc44, .end = 0xdc44 }, /* OAA_DBG_REG */ |
| 2173 | {} |
| 2174 | }; |
| 2175 | |
| 2176 | static const struct xe_mmio_range mtl_oam_b_counters[] = { |
| 2177 | { .start = 0x393000, .end = 0x39301c }, /* OAM_STARTTRIG1[1-8] */ |
| 2178 | { .start = 0x393020, .end = 0x39303c }, /* OAM_REPORTTRIG1[1-8] */ |
| 2179 | { .start = 0x393040, .end = 0x39307c }, /* OAM_CEC[0-7][0-1] */ |
| 2180 | { .start = 0x393200, .end = 0x39323C }, /* MPES[0-7] */ |
| 2181 | {} |
| 2182 | }; |
| 2183 | |
| 2184 | static const struct xe_mmio_range xe2_oa_b_counters[] = { |
| 2185 | { .start = 0x393200, .end = 0x39323C }, /* MPES_0_MPES_SAG - MPES_7_UPPER_MPES_SAG */ |
| 2186 | { .start = 0x394200, .end = 0x39423C }, /* MPES_0_MPES_SCMI0 - MPES_7_UPPER_MPES_SCMI0 */ |
| 2187 | { .start = 0x394A00, .end = 0x394A3C }, /* MPES_0_MPES_SCMI1 - MPES_7_UPPER_MPES_SCMI1 */ |
| 2188 | {}, |
| 2189 | }; |
| 2190 | |
| 2191 | static bool xe_oa_is_valid_b_counter_addr(struct xe_oa *oa, u32 addr) |
| 2192 | { |
| 2193 | return xe_oa_reg_in_range_table(addr, table: xehp_oa_b_counters) || |
| 2194 | xe_oa_reg_in_range_table(addr, table: gen12_oa_b_counters) || |
| 2195 | xe_oa_reg_in_range_table(addr, table: mtl_oam_b_counters) || |
| 2196 | (GRAPHICS_VER(oa->xe) >= 20 && |
| 2197 | xe_oa_reg_in_range_table(addr, table: xe2_oa_b_counters)); |
| 2198 | } |
| 2199 | |
| 2200 | static const struct xe_mmio_range mtl_oa_mux_regs[] = { |
| 2201 | { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ |
| 2202 | { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ |
| 2203 | { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ |
| 2204 | { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ |
| 2205 | { .start = 0x38d100, .end = 0x38d114}, /* VISACTL */ |
| 2206 | {} |
| 2207 | }; |
| 2208 | |
| 2209 | static const struct xe_mmio_range gen12_oa_mux_regs[] = { |
| 2210 | { .start = 0x0d00, .end = 0x0d04 }, /* RPM_CONFIG[0-1] */ |
| 2211 | { .start = 0x0d0c, .end = 0x0d2c }, /* NOA_CONFIG[0-8] */ |
| 2212 | { .start = 0x9840, .end = 0x9840 }, /* GDT_CHICKEN_BITS */ |
| 2213 | { .start = 0x9884, .end = 0x9888 }, /* NOA_WRITE */ |
| 2214 | { .start = 0x20cc, .end = 0x20cc }, /* WAIT_FOR_RC6_EXIT */ |
| 2215 | {} |
| 2216 | }; |
| 2217 | |
| 2218 | static const struct xe_mmio_range xe2_oa_mux_regs[] = { |
| 2219 | { .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */ |
| 2220 | { .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */ |
| 2221 | { .start = 0xB01C, .end = 0xB01C }, /* LNCF_MISC_CONFIG_REGISTER0 */ |
| 2222 | { .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */ |
| 2223 | { .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */ |
| 2224 | { .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */ |
| 2225 | { .start = 0xE590, .end = 0xE590 }, /* TDL_LSC_LAT_MEASURE_TDL_GFX */ |
| 2226 | { .start = 0x13000, .end = 0x137FC }, /* PES_0_PESL0 - PES_63_UPPER_PESL3 */ |
| 2227 | {}, |
| 2228 | }; |
| 2229 | |
| 2230 | static bool xe_oa_is_valid_mux_addr(struct xe_oa *oa, u32 addr) |
| 2231 | { |
| 2232 | if (GRAPHICS_VER(oa->xe) >= 20) |
| 2233 | return xe_oa_reg_in_range_table(addr, table: xe2_oa_mux_regs); |
| 2234 | else if (GRAPHICS_VERx100(oa->xe) >= 1270) |
| 2235 | return xe_oa_reg_in_range_table(addr, table: mtl_oa_mux_regs); |
| 2236 | else |
| 2237 | return xe_oa_reg_in_range_table(addr, table: gen12_oa_mux_regs); |
| 2238 | } |
| 2239 | |
| 2240 | static bool xe_oa_is_valid_config_reg_addr(struct xe_oa *oa, u32 addr) |
| 2241 | { |
| 2242 | return xe_oa_is_valid_flex_addr(oa, addr) || |
| 2243 | xe_oa_is_valid_b_counter_addr(oa, addr) || |
| 2244 | xe_oa_is_valid_mux_addr(oa, addr); |
| 2245 | } |
| 2246 | |
| 2247 | static struct xe_oa_reg * |
| 2248 | xe_oa_alloc_regs(struct xe_oa *oa, bool (*is_valid)(struct xe_oa *oa, u32 addr), |
| 2249 | u32 __user *regs, u32 n_regs) |
| 2250 | { |
| 2251 | struct xe_oa_reg *oa_regs; |
| 2252 | int err; |
| 2253 | u32 i; |
| 2254 | |
| 2255 | oa_regs = kmalloc_array(n_regs, sizeof(*oa_regs), GFP_KERNEL); |
| 2256 | if (!oa_regs) |
| 2257 | return ERR_PTR(error: -ENOMEM); |
| 2258 | |
| 2259 | for (i = 0; i < n_regs; i++) { |
| 2260 | u32 addr, value; |
| 2261 | |
| 2262 | err = get_user(addr, regs); |
| 2263 | if (err) |
| 2264 | goto addr_err; |
| 2265 | |
| 2266 | if (!is_valid(oa, addr)) { |
| 2267 | drm_dbg(&oa->xe->drm, "Invalid oa_reg address: %X\n" , addr); |
| 2268 | err = -EINVAL; |
| 2269 | goto addr_err; |
| 2270 | } |
| 2271 | |
| 2272 | err = get_user(value, regs + 1); |
| 2273 | if (err) |
| 2274 | goto addr_err; |
| 2275 | |
| 2276 | oa_regs[i].addr = XE_REG(addr); |
| 2277 | oa_regs[i].value = value; |
| 2278 | |
| 2279 | regs += 2; |
| 2280 | } |
| 2281 | |
| 2282 | return oa_regs; |
| 2283 | |
| 2284 | addr_err: |
| 2285 | kfree(objp: oa_regs); |
| 2286 | return ERR_PTR(error: err); |
| 2287 | } |
| 2288 | ALLOW_ERROR_INJECTION(xe_oa_alloc_regs, ERRNO); |
| 2289 | |
| 2290 | static ssize_t show_dynamic_id(struct kobject *kobj, |
| 2291 | struct kobj_attribute *attr, |
| 2292 | char *buf) |
| 2293 | { |
| 2294 | struct xe_oa_config *oa_config = |
| 2295 | container_of(attr, typeof(*oa_config), sysfs_metric_id); |
| 2296 | |
| 2297 | return sysfs_emit(buf, fmt: "%d\n" , oa_config->id); |
| 2298 | } |
| 2299 | |
| 2300 | static int create_dynamic_oa_sysfs_entry(struct xe_oa *oa, |
| 2301 | struct xe_oa_config *oa_config) |
| 2302 | { |
| 2303 | sysfs_attr_init(&oa_config->sysfs_metric_id.attr); |
| 2304 | oa_config->sysfs_metric_id.attr.name = "id" ; |
| 2305 | oa_config->sysfs_metric_id.attr.mode = 0444; |
| 2306 | oa_config->sysfs_metric_id.show = show_dynamic_id; |
| 2307 | oa_config->sysfs_metric_id.store = NULL; |
| 2308 | |
| 2309 | oa_config->attrs[0] = &oa_config->sysfs_metric_id.attr; |
| 2310 | oa_config->attrs[1] = NULL; |
| 2311 | |
| 2312 | oa_config->sysfs_metric.name = oa_config->uuid; |
| 2313 | oa_config->sysfs_metric.attrs = oa_config->attrs; |
| 2314 | |
| 2315 | return sysfs_create_group(kobj: oa->metrics_kobj, grp: &oa_config->sysfs_metric); |
| 2316 | } |
| 2317 | |
| 2318 | /** |
| 2319 | * xe_oa_add_config_ioctl - Adds one OA config |
| 2320 | * @dev: @drm_device |
| 2321 | * @data: pointer to struct @drm_xe_oa_config |
| 2322 | * @file: @drm_file |
| 2323 | * |
| 2324 | * The functions adds an OA config to the set of OA configs maintained in |
| 2325 | * the kernel. The config determines which OA metrics are collected for an |
| 2326 | * OA stream. |
| 2327 | */ |
| 2328 | int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) |
| 2329 | { |
| 2330 | struct xe_device *xe = to_xe_device(dev); |
| 2331 | struct xe_oa *oa = &xe->oa; |
| 2332 | struct drm_xe_oa_config param; |
| 2333 | struct drm_xe_oa_config *arg = ¶m; |
| 2334 | struct xe_oa_config *oa_config, *tmp; |
| 2335 | struct xe_oa_reg *regs; |
| 2336 | int err, id; |
| 2337 | |
| 2338 | if (!oa->xe) { |
| 2339 | drm_dbg(&xe->drm, "xe oa interface not available for this system\n" ); |
| 2340 | return -ENODEV; |
| 2341 | } |
| 2342 | |
| 2343 | if (xe_observation_paranoid && !perfmon_capable()) { |
| 2344 | drm_dbg(&oa->xe->drm, "Insufficient privileges to add xe OA config\n" ); |
| 2345 | return -EACCES; |
| 2346 | } |
| 2347 | |
| 2348 | err = copy_from_user(to: ¶m, u64_to_user_ptr(data), n: sizeof(param)); |
| 2349 | if (XE_IOCTL_DBG(oa->xe, err)) |
| 2350 | return -EFAULT; |
| 2351 | |
| 2352 | if (XE_IOCTL_DBG(oa->xe, arg->extensions) || |
| 2353 | XE_IOCTL_DBG(oa->xe, !arg->regs_ptr) || |
| 2354 | XE_IOCTL_DBG(oa->xe, !arg->n_regs)) |
| 2355 | return -EINVAL; |
| 2356 | |
| 2357 | oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); |
| 2358 | if (!oa_config) |
| 2359 | return -ENOMEM; |
| 2360 | |
| 2361 | oa_config->oa = oa; |
| 2362 | kref_init(kref: &oa_config->ref); |
| 2363 | |
| 2364 | if (!uuid_is_valid(uuid: arg->uuid)) { |
| 2365 | drm_dbg(&oa->xe->drm, "Invalid uuid format for OA config\n" ); |
| 2366 | err = -EINVAL; |
| 2367 | goto reg_err; |
| 2368 | } |
| 2369 | |
| 2370 | /* Last character in oa_config->uuid will be 0 because oa_config is kzalloc */ |
| 2371 | memcpy(oa_config->uuid, arg->uuid, sizeof(arg->uuid)); |
| 2372 | |
| 2373 | oa_config->regs_len = arg->n_regs; |
| 2374 | regs = xe_oa_alloc_regs(oa, is_valid: xe_oa_is_valid_config_reg_addr, |
| 2375 | u64_to_user_ptr(arg->regs_ptr), |
| 2376 | n_regs: arg->n_regs); |
| 2377 | if (IS_ERR(ptr: regs)) { |
| 2378 | drm_dbg(&oa->xe->drm, "Failed to create OA config for mux_regs\n" ); |
| 2379 | err = PTR_ERR(ptr: regs); |
| 2380 | goto reg_err; |
| 2381 | } |
| 2382 | oa_config->regs = regs; |
| 2383 | |
| 2384 | err = mutex_lock_interruptible(&oa->metrics_lock); |
| 2385 | if (err) |
| 2386 | goto reg_err; |
| 2387 | |
| 2388 | /* We shouldn't have too many configs, so this iteration shouldn't be too costly */ |
| 2389 | idr_for_each_entry(&oa->metrics_idr, tmp, id) { |
| 2390 | if (!strcmp(tmp->uuid, oa_config->uuid)) { |
| 2391 | drm_dbg(&oa->xe->drm, "OA config already exists with this uuid\n" ); |
| 2392 | err = -EADDRINUSE; |
| 2393 | goto sysfs_err; |
| 2394 | } |
| 2395 | } |
| 2396 | |
| 2397 | err = create_dynamic_oa_sysfs_entry(oa, oa_config); |
| 2398 | if (err) { |
| 2399 | drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n" ); |
| 2400 | goto sysfs_err; |
| 2401 | } |
| 2402 | |
| 2403 | oa_config->id = idr_alloc(&oa->metrics_idr, ptr: oa_config, start: 1, end: 0, GFP_KERNEL); |
| 2404 | if (oa_config->id < 0) { |
| 2405 | drm_dbg(&oa->xe->drm, "Failed to create sysfs entry for OA config\n" ); |
| 2406 | err = oa_config->id; |
| 2407 | goto sysfs_err; |
| 2408 | } |
| 2409 | |
| 2410 | id = oa_config->id; |
| 2411 | |
| 2412 | drm_dbg(&oa->xe->drm, "Added config %s id=%i\n" , oa_config->uuid, id); |
| 2413 | |
| 2414 | mutex_unlock(lock: &oa->metrics_lock); |
| 2415 | |
| 2416 | return id; |
| 2417 | |
| 2418 | sysfs_err: |
| 2419 | mutex_unlock(lock: &oa->metrics_lock); |
| 2420 | reg_err: |
| 2421 | xe_oa_config_put(oa_config); |
| 2422 | drm_dbg(&oa->xe->drm, "Failed to add new OA config\n" ); |
| 2423 | return err; |
| 2424 | } |
| 2425 | |
| 2426 | /** |
| 2427 | * xe_oa_remove_config_ioctl - Removes one OA config |
| 2428 | * @dev: @drm_device |
| 2429 | * @data: pointer to struct @drm_xe_observation_param |
| 2430 | * @file: @drm_file |
| 2431 | */ |
| 2432 | int xe_oa_remove_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *file) |
| 2433 | { |
| 2434 | struct xe_device *xe = to_xe_device(dev); |
| 2435 | struct xe_oa *oa = &xe->oa; |
| 2436 | struct xe_oa_config *oa_config; |
| 2437 | u64 arg, *ptr = u64_to_user_ptr(data); |
| 2438 | int ret; |
| 2439 | |
| 2440 | if (!oa->xe) { |
| 2441 | drm_dbg(&xe->drm, "xe oa interface not available for this system\n" ); |
| 2442 | return -ENODEV; |
| 2443 | } |
| 2444 | |
| 2445 | if (xe_observation_paranoid && !perfmon_capable()) { |
| 2446 | drm_dbg(&oa->xe->drm, "Insufficient privileges to remove xe OA config\n" ); |
| 2447 | return -EACCES; |
| 2448 | } |
| 2449 | |
| 2450 | ret = get_user(arg, ptr); |
| 2451 | if (XE_IOCTL_DBG(oa->xe, ret)) |
| 2452 | return ret; |
| 2453 | |
| 2454 | ret = mutex_lock_interruptible(&oa->metrics_lock); |
| 2455 | if (ret) |
| 2456 | return ret; |
| 2457 | |
| 2458 | oa_config = idr_find(&oa->metrics_idr, id: arg); |
| 2459 | if (!oa_config) { |
| 2460 | drm_dbg(&oa->xe->drm, "Failed to remove unknown OA config\n" ); |
| 2461 | ret = -ENOENT; |
| 2462 | goto err_unlock; |
| 2463 | } |
| 2464 | |
| 2465 | WARN_ON(arg != oa_config->id); |
| 2466 | |
| 2467 | sysfs_remove_group(kobj: oa->metrics_kobj, grp: &oa_config->sysfs_metric); |
| 2468 | idr_remove(&oa->metrics_idr, id: arg); |
| 2469 | |
| 2470 | mutex_unlock(lock: &oa->metrics_lock); |
| 2471 | |
| 2472 | drm_dbg(&oa->xe->drm, "Removed config %s id=%i\n" , oa_config->uuid, oa_config->id); |
| 2473 | |
| 2474 | xe_oa_config_put(oa_config); |
| 2475 | |
| 2476 | return 0; |
| 2477 | |
| 2478 | err_unlock: |
| 2479 | mutex_unlock(lock: &oa->metrics_lock); |
| 2480 | return ret; |
| 2481 | } |
| 2482 | |
| 2483 | static void xe_oa_unregister(void *arg) |
| 2484 | { |
| 2485 | struct xe_oa *oa = arg; |
| 2486 | |
| 2487 | if (!oa->metrics_kobj) |
| 2488 | return; |
| 2489 | |
| 2490 | kobject_put(kobj: oa->metrics_kobj); |
| 2491 | oa->metrics_kobj = NULL; |
| 2492 | } |
| 2493 | |
| 2494 | /** |
| 2495 | * xe_oa_register - Xe OA registration |
| 2496 | * @xe: @xe_device |
| 2497 | * |
| 2498 | * Exposes the metrics sysfs directory upon completion of module initialization |
| 2499 | */ |
| 2500 | int xe_oa_register(struct xe_device *xe) |
| 2501 | { |
| 2502 | struct xe_oa *oa = &xe->oa; |
| 2503 | |
| 2504 | if (!oa->xe) |
| 2505 | return 0; |
| 2506 | |
| 2507 | oa->metrics_kobj = kobject_create_and_add(name: "metrics" , |
| 2508 | parent: &xe->drm.primary->kdev->kobj); |
| 2509 | if (!oa->metrics_kobj) |
| 2510 | return -ENOMEM; |
| 2511 | |
| 2512 | return devm_add_action_or_reset(xe->drm.dev, xe_oa_unregister, oa); |
| 2513 | } |
| 2514 | |
| 2515 | static u32 num_oa_units_per_gt(struct xe_gt *gt) |
| 2516 | { |
| 2517 | if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20) |
| 2518 | return 1; |
| 2519 | else if (!IS_DGFX(gt_to_xe(gt))) |
| 2520 | return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */ |
| 2521 | else |
| 2522 | return XE_OAM_UNIT_SCMI_1 + 1; /* SAG + SCMI_0 + SCMI_1 */ |
| 2523 | } |
| 2524 | |
| 2525 | static u32 __hwe_oam_unit(struct xe_hw_engine *hwe) |
| 2526 | { |
| 2527 | if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270) |
| 2528 | return XE_OA_UNIT_INVALID; |
| 2529 | |
| 2530 | xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt)); |
| 2531 | |
| 2532 | if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20) |
| 2533 | return 0; |
| 2534 | /* |
| 2535 | * XE_OAM_UNIT_SAG has only GSCCS attached to it, but only on some platforms. Also |
| 2536 | * GSCCS cannot be used to submit batches to program the OAM unit. Therefore we don't |
| 2537 | * assign an OA unit to GSCCS. This means that XE_OAM_UNIT_SAG is exposed as an OA |
| 2538 | * unit without attached engines. Fused off engines can also result in oa_unit's with |
| 2539 | * num_engines == 0. OA streams can be opened on all OA units. |
| 2540 | */ |
| 2541 | else if (hwe->engine_id == XE_HW_ENGINE_GSCCS0) |
| 2542 | return XE_OA_UNIT_INVALID; |
| 2543 | else if (!IS_DGFX(gt_to_xe(hwe->gt))) |
| 2544 | return XE_OAM_UNIT_SCMI_0; |
| 2545 | else if (hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE) |
| 2546 | return (hwe->instance / 2 & 0x1) + 1; |
| 2547 | else if (hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE) |
| 2548 | return (hwe->instance & 0x1) + 1; |
| 2549 | |
| 2550 | return XE_OA_UNIT_INVALID; |
| 2551 | } |
| 2552 | |
| 2553 | static u32 __hwe_oa_unit(struct xe_hw_engine *hwe) |
| 2554 | { |
| 2555 | switch (hwe->class) { |
| 2556 | case XE_ENGINE_CLASS_RENDER: |
| 2557 | case XE_ENGINE_CLASS_COMPUTE: |
| 2558 | return 0; |
| 2559 | |
| 2560 | case XE_ENGINE_CLASS_VIDEO_DECODE: |
| 2561 | case XE_ENGINE_CLASS_VIDEO_ENHANCE: |
| 2562 | case XE_ENGINE_CLASS_OTHER: |
| 2563 | return __hwe_oam_unit(hwe); |
| 2564 | |
| 2565 | default: |
| 2566 | return XE_OA_UNIT_INVALID; |
| 2567 | } |
| 2568 | } |
| 2569 | |
| 2570 | static struct xe_oa_regs __oam_regs(u32 base) |
| 2571 | { |
| 2572 | return (struct xe_oa_regs) { |
| 2573 | base, |
| 2574 | OAM_HEAD_POINTER(base), |
| 2575 | OAM_TAIL_POINTER(base), |
| 2576 | OAM_BUFFER(base), |
| 2577 | OAM_CONTEXT_CONTROL(base), |
| 2578 | OAM_CONTROL(base), |
| 2579 | OAM_DEBUG(base), |
| 2580 | OAM_STATUS(base), |
| 2581 | OAM_CONTROL_COUNTER_SEL_MASK, |
| 2582 | }; |
| 2583 | } |
| 2584 | |
| 2585 | static struct xe_oa_regs __oag_regs(void) |
| 2586 | { |
| 2587 | return (struct xe_oa_regs) { |
| 2588 | 0, |
| 2589 | OAG_OAHEADPTR, |
| 2590 | OAG_OATAILPTR, |
| 2591 | OAG_OABUFFER, |
| 2592 | OAG_OAGLBCTXCTRL, |
| 2593 | OAG_OACONTROL, |
| 2594 | OAG_OA_DEBUG, |
| 2595 | OAG_OASTATUS, |
| 2596 | OAG_OACONTROL_OA_COUNTER_SEL_MASK, |
| 2597 | }; |
| 2598 | } |
| 2599 | |
| 2600 | static void __xe_oa_init_oa_units(struct xe_gt *gt) |
| 2601 | { |
| 2602 | /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */ |
| 2603 | const u32 oam_base_addr[] = { |
| 2604 | [XE_OAM_UNIT_SAG] = 0x13000, |
| 2605 | [XE_OAM_UNIT_SCMI_0] = 0x14000, |
| 2606 | [XE_OAM_UNIT_SCMI_1] = 0x14800, |
| 2607 | }; |
| 2608 | int i, num_units = gt->oa.num_oa_units; |
| 2609 | |
| 2610 | for (i = 0; i < num_units; i++) { |
| 2611 | struct xe_oa_unit *u = >->oa.oa_unit[i]; |
| 2612 | |
| 2613 | if (xe_gt_is_main_type(gt)) { |
| 2614 | u->regs = __oag_regs(); |
| 2615 | u->type = DRM_XE_OA_UNIT_TYPE_OAG; |
| 2616 | } else { |
| 2617 | xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270); |
| 2618 | u->regs = __oam_regs(base: oam_base_addr[i]); |
| 2619 | u->type = i == XE_OAM_UNIT_SAG && GRAPHICS_VER(gt_to_xe(gt)) >= 20 ? |
| 2620 | DRM_XE_OA_UNIT_TYPE_OAM_SAG : DRM_XE_OA_UNIT_TYPE_OAM; |
| 2621 | } |
| 2622 | |
| 2623 | u->gt = gt; |
| 2624 | |
| 2625 | xe_mmio_write32(mmio: >->mmio, reg: u->regs.oa_ctrl, val: 0); |
| 2626 | |
| 2627 | /* Ensure MMIO trigger remains disabled till there is a stream */ |
| 2628 | xe_mmio_write32(mmio: >->mmio, reg: u->regs.oa_debug, |
| 2629 | val: oag_configure_mmio_trigger(NULL, enable: false)); |
| 2630 | |
| 2631 | /* Set oa_unit_ids now to ensure ids remain contiguous */ |
| 2632 | u->oa_unit_id = gt_to_xe(gt)->oa.oa_unit_ids++; |
| 2633 | } |
| 2634 | } |
| 2635 | |
| 2636 | static int xe_oa_init_gt(struct xe_gt *gt) |
| 2637 | { |
| 2638 | u32 num_oa_units = num_oa_units_per_gt(gt); |
| 2639 | struct xe_hw_engine *hwe; |
| 2640 | enum xe_hw_engine_id id; |
| 2641 | struct xe_oa_unit *u; |
| 2642 | |
| 2643 | u = drmm_kcalloc(dev: >_to_xe(gt)->drm, n: num_oa_units, size: sizeof(*u), GFP_KERNEL); |
| 2644 | if (!u) |
| 2645 | return -ENOMEM; |
| 2646 | |
| 2647 | for_each_hw_engine(hwe, gt, id) { |
| 2648 | u32 index = __hwe_oa_unit(hwe); |
| 2649 | |
| 2650 | hwe->oa_unit = NULL; |
| 2651 | if (index < num_oa_units) { |
| 2652 | u[index].num_engines++; |
| 2653 | hwe->oa_unit = &u[index]; |
| 2654 | } |
| 2655 | } |
| 2656 | |
| 2657 | gt->oa.num_oa_units = num_oa_units; |
| 2658 | gt->oa.oa_unit = u; |
| 2659 | |
| 2660 | __xe_oa_init_oa_units(gt); |
| 2661 | |
| 2662 | drmm_mutex_init(>_to_xe(gt)->drm, >->oa.gt_lock); |
| 2663 | |
| 2664 | return 0; |
| 2665 | } |
| 2666 | |
| 2667 | static void xe_oa_print_gt_oa_units(struct xe_gt *gt) |
| 2668 | { |
| 2669 | enum xe_hw_engine_id hwe_id; |
| 2670 | struct xe_hw_engine *hwe; |
| 2671 | struct xe_oa_unit *u; |
| 2672 | char buf[256]; |
| 2673 | int i, n; |
| 2674 | |
| 2675 | for (i = 0; i < gt->oa.num_oa_units; i++) { |
| 2676 | u = >->oa.oa_unit[i]; |
| 2677 | buf[0] = '\0'; |
| 2678 | n = 0; |
| 2679 | |
| 2680 | for_each_hw_engine(hwe, gt, hwe_id) |
| 2681 | if (xe_oa_unit_id(hwe) == u->oa_unit_id) |
| 2682 | n += scnprintf(buf: buf + n, size: sizeof(buf) - n, fmt: "%s " , hwe->name); |
| 2683 | |
| 2684 | xe_gt_dbg(gt, "oa_unit %d, type %d, Engines: %s\n" , u->oa_unit_id, u->type, buf); |
| 2685 | } |
| 2686 | } |
| 2687 | |
| 2688 | static void xe_oa_print_oa_units(struct xe_oa *oa) |
| 2689 | { |
| 2690 | struct xe_gt *gt; |
| 2691 | int gt_id; |
| 2692 | |
| 2693 | for_each_gt(gt, oa->xe, gt_id) |
| 2694 | xe_oa_print_gt_oa_units(gt); |
| 2695 | } |
| 2696 | |
| 2697 | static int xe_oa_init_oa_units(struct xe_oa *oa) |
| 2698 | { |
| 2699 | struct xe_gt *gt; |
| 2700 | int i, ret; |
| 2701 | |
| 2702 | /* Needed for OAM implementation here */ |
| 2703 | BUILD_BUG_ON(XE_OAM_UNIT_SAG != 0); |
| 2704 | BUILD_BUG_ON(XE_OAM_UNIT_SCMI_0 != 1); |
| 2705 | BUILD_BUG_ON(XE_OAM_UNIT_SCMI_1 != 2); |
| 2706 | |
| 2707 | for_each_gt(gt, oa->xe, i) { |
| 2708 | ret = xe_oa_init_gt(gt); |
| 2709 | if (ret) |
| 2710 | return ret; |
| 2711 | } |
| 2712 | |
| 2713 | xe_oa_print_oa_units(oa); |
| 2714 | |
| 2715 | return 0; |
| 2716 | } |
| 2717 | |
| 2718 | static void oa_format_add(struct xe_oa *oa, enum xe_oa_format_name format) |
| 2719 | { |
| 2720 | __set_bit(format, oa->format_mask); |
| 2721 | } |
| 2722 | |
| 2723 | static void xe_oa_init_supported_formats(struct xe_oa *oa) |
| 2724 | { |
| 2725 | if (GRAPHICS_VER(oa->xe) >= 20) { |
| 2726 | /* Xe2+ */ |
| 2727 | oa_format_add(oa, format: XE_OAM_FORMAT_MPEC8u64_B8_C8); |
| 2728 | oa_format_add(oa, format: XE_OAM_FORMAT_MPEC8u32_B8_C8); |
| 2729 | oa_format_add(oa, format: XE_OA_FORMAT_PEC64u64); |
| 2730 | oa_format_add(oa, format: XE_OA_FORMAT_PEC64u64_B8_C8); |
| 2731 | oa_format_add(oa, format: XE_OA_FORMAT_PEC64u32); |
| 2732 | oa_format_add(oa, format: XE_OA_FORMAT_PEC32u64_G1); |
| 2733 | oa_format_add(oa, format: XE_OA_FORMAT_PEC32u32_G1); |
| 2734 | oa_format_add(oa, format: XE_OA_FORMAT_PEC32u64_G2); |
| 2735 | oa_format_add(oa, format: XE_OA_FORMAT_PEC32u32_G2); |
| 2736 | oa_format_add(oa, format: XE_OA_FORMAT_PEC36u64_G1_32_G2_4); |
| 2737 | oa_format_add(oa, format: XE_OA_FORMAT_PEC36u64_G1_4_G2_32); |
| 2738 | } else if (GRAPHICS_VERx100(oa->xe) >= 1270) { |
| 2739 | /* XE_METEORLAKE */ |
| 2740 | oa_format_add(oa, format: XE_OAR_FORMAT_A32u40_A4u32_B8_C8); |
| 2741 | oa_format_add(oa, format: XE_OA_FORMAT_A24u40_A14u32_B8_C8); |
| 2742 | oa_format_add(oa, format: XE_OAC_FORMAT_A24u64_B8_C8); |
| 2743 | oa_format_add(oa, format: XE_OAC_FORMAT_A22u32_R2u32_B8_C8); |
| 2744 | oa_format_add(oa, format: XE_OAM_FORMAT_MPEC8u64_B8_C8); |
| 2745 | oa_format_add(oa, format: XE_OAM_FORMAT_MPEC8u32_B8_C8); |
| 2746 | } else if (GRAPHICS_VERx100(oa->xe) >= 1255) { |
| 2747 | /* XE_DG2, XE_PVC */ |
| 2748 | oa_format_add(oa, format: XE_OAR_FORMAT_A32u40_A4u32_B8_C8); |
| 2749 | oa_format_add(oa, format: XE_OA_FORMAT_A24u40_A14u32_B8_C8); |
| 2750 | oa_format_add(oa, format: XE_OAC_FORMAT_A24u64_B8_C8); |
| 2751 | oa_format_add(oa, format: XE_OAC_FORMAT_A22u32_R2u32_B8_C8); |
| 2752 | } else { |
| 2753 | /* Gen12+ */ |
| 2754 | xe_assert(oa->xe, GRAPHICS_VER(oa->xe) >= 12); |
| 2755 | oa_format_add(oa, format: XE_OA_FORMAT_A12); |
| 2756 | oa_format_add(oa, format: XE_OA_FORMAT_A12_B8_C8); |
| 2757 | oa_format_add(oa, format: XE_OA_FORMAT_A32u40_A4u32_B8_C8); |
| 2758 | oa_format_add(oa, format: XE_OA_FORMAT_C4_B8); |
| 2759 | } |
| 2760 | } |
| 2761 | |
| 2762 | static int destroy_config(int id, void *p, void *data) |
| 2763 | { |
| 2764 | xe_oa_config_put(oa_config: p); |
| 2765 | |
| 2766 | return 0; |
| 2767 | } |
| 2768 | |
| 2769 | static void xe_oa_fini(void *arg) |
| 2770 | { |
| 2771 | struct xe_device *xe = arg; |
| 2772 | struct xe_oa *oa = &xe->oa; |
| 2773 | |
| 2774 | if (!oa->xe) |
| 2775 | return; |
| 2776 | |
| 2777 | idr_for_each(&oa->metrics_idr, fn: destroy_config, data: oa); |
| 2778 | idr_destroy(&oa->metrics_idr); |
| 2779 | |
| 2780 | oa->xe = NULL; |
| 2781 | } |
| 2782 | |
| 2783 | /** |
| 2784 | * xe_oa_init - OA initialization during device probe |
| 2785 | * @xe: @xe_device |
| 2786 | * |
| 2787 | * Return: 0 on success or a negative error code on failure |
| 2788 | */ |
| 2789 | int xe_oa_init(struct xe_device *xe) |
| 2790 | { |
| 2791 | struct xe_oa *oa = &xe->oa; |
| 2792 | int ret; |
| 2793 | |
| 2794 | /* Support OA only with GuC submission and Gen12+ */ |
| 2795 | if (!xe_device_uc_enabled(xe) || GRAPHICS_VER(xe) < 12) |
| 2796 | return 0; |
| 2797 | |
| 2798 | if (IS_SRIOV_VF(xe)) |
| 2799 | return 0; |
| 2800 | |
| 2801 | oa->xe = xe; |
| 2802 | oa->oa_formats = oa_formats; |
| 2803 | |
| 2804 | drmm_mutex_init(&oa->xe->drm, &oa->metrics_lock); |
| 2805 | idr_init_base(idr: &oa->metrics_idr, base: 1); |
| 2806 | |
| 2807 | ret = xe_oa_init_oa_units(oa); |
| 2808 | if (ret) { |
| 2809 | drm_err(&xe->drm, "OA initialization failed (%pe)\n" , ERR_PTR(ret)); |
| 2810 | goto exit; |
| 2811 | } |
| 2812 | |
| 2813 | xe_oa_init_supported_formats(oa); |
| 2814 | |
| 2815 | return devm_add_action_or_reset(xe->drm.dev, xe_oa_fini, xe); |
| 2816 | |
| 2817 | exit: |
| 2818 | oa->xe = NULL; |
| 2819 | return ret; |
| 2820 | } |
| 2821 | |