| 1 | /* |
| 2 | * SPDX-License-Identifier: MIT |
| 3 | * |
| 4 | * Copyright © 2019 Intel Corporation |
| 5 | */ |
| 6 | |
| 7 | #include <linux/kref.h> |
| 8 | |
| 9 | #include "gem/i915_gem_pm.h" |
| 10 | #include "gt/intel_gt.h" |
| 11 | |
| 12 | #include "i915_selftest.h" |
| 13 | |
| 14 | #include "igt_flush_test.h" |
| 15 | #include "lib_sw_fence.h" |
| 16 | |
| 17 | #define TEST_OA_CONFIG_UUID "12345678-1234-1234-1234-1234567890ab" |
| 18 | |
| 19 | static int |
| 20 | alloc_empty_config(struct i915_perf *perf) |
| 21 | { |
| 22 | struct i915_oa_config *oa_config; |
| 23 | |
| 24 | oa_config = kzalloc(sizeof(*oa_config), GFP_KERNEL); |
| 25 | if (!oa_config) |
| 26 | return -ENOMEM; |
| 27 | |
| 28 | oa_config->perf = perf; |
| 29 | kref_init(kref: &oa_config->ref); |
| 30 | |
| 31 | strscpy(oa_config->uuid, TEST_OA_CONFIG_UUID, sizeof(oa_config->uuid)); |
| 32 | |
| 33 | mutex_lock(&perf->metrics_lock); |
| 34 | |
| 35 | oa_config->id = idr_alloc(&perf->metrics_idr, ptr: oa_config, start: 2, end: 0, GFP_KERNEL); |
| 36 | if (oa_config->id < 0) { |
| 37 | mutex_unlock(lock: &perf->metrics_lock); |
| 38 | i915_oa_config_put(oa_config); |
| 39 | return -ENOMEM; |
| 40 | } |
| 41 | |
| 42 | mutex_unlock(lock: &perf->metrics_lock); |
| 43 | |
| 44 | return 0; |
| 45 | } |
| 46 | |
| 47 | static void |
| 48 | destroy_empty_config(struct i915_perf *perf) |
| 49 | { |
| 50 | struct i915_oa_config *oa_config = NULL, *tmp; |
| 51 | int id; |
| 52 | |
| 53 | mutex_lock(&perf->metrics_lock); |
| 54 | |
| 55 | idr_for_each_entry(&perf->metrics_idr, tmp, id) { |
| 56 | if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) { |
| 57 | oa_config = tmp; |
| 58 | break; |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | if (oa_config) |
| 63 | idr_remove(&perf->metrics_idr, id: oa_config->id); |
| 64 | |
| 65 | mutex_unlock(lock: &perf->metrics_lock); |
| 66 | |
| 67 | if (oa_config) |
| 68 | i915_oa_config_put(oa_config); |
| 69 | } |
| 70 | |
| 71 | static struct i915_oa_config * |
| 72 | get_empty_config(struct i915_perf *perf) |
| 73 | { |
| 74 | struct i915_oa_config *oa_config = NULL, *tmp; |
| 75 | int id; |
| 76 | |
| 77 | mutex_lock(&perf->metrics_lock); |
| 78 | |
| 79 | idr_for_each_entry(&perf->metrics_idr, tmp, id) { |
| 80 | if (!strcmp(tmp->uuid, TEST_OA_CONFIG_UUID)) { |
| 81 | oa_config = i915_oa_config_get(oa_config: tmp); |
| 82 | break; |
| 83 | } |
| 84 | } |
| 85 | |
| 86 | mutex_unlock(lock: &perf->metrics_lock); |
| 87 | |
| 88 | return oa_config; |
| 89 | } |
| 90 | |
| 91 | static struct i915_perf_stream * |
| 92 | test_stream(struct i915_perf *perf) |
| 93 | { |
| 94 | struct drm_i915_perf_open_param param = {}; |
| 95 | struct i915_oa_config *oa_config = get_empty_config(perf); |
| 96 | struct perf_open_properties props = { |
| 97 | .engine = intel_engine_lookup_user(i915: perf->i915, |
| 98 | class: I915_ENGINE_CLASS_RENDER, |
| 99 | instance: 0), |
| 100 | .sample_flags = SAMPLE_OA_REPORT, |
| 101 | .oa_format = GRAPHICS_VER(perf->i915) == 12 ? |
| 102 | I915_OA_FORMAT_A32u40_A4u32_B8_C8 : I915_OA_FORMAT_C4_B8, |
| 103 | }; |
| 104 | struct i915_perf_stream *stream; |
| 105 | struct intel_gt *gt; |
| 106 | |
| 107 | if (!props.engine) |
| 108 | return NULL; |
| 109 | |
| 110 | gt = props.engine->gt; |
| 111 | |
| 112 | if (!oa_config) |
| 113 | return NULL; |
| 114 | |
| 115 | props.metrics_set = oa_config->id; |
| 116 | |
| 117 | stream = kzalloc(sizeof(*stream), GFP_KERNEL); |
| 118 | if (!stream) { |
| 119 | i915_oa_config_put(oa_config); |
| 120 | return NULL; |
| 121 | } |
| 122 | |
| 123 | stream->perf = perf; |
| 124 | |
| 125 | mutex_lock(>->perf.lock); |
| 126 | if (i915_oa_stream_init(stream, param: ¶m, props: &props)) { |
| 127 | kfree(objp: stream); |
| 128 | stream = NULL; |
| 129 | } |
| 130 | mutex_unlock(lock: >->perf.lock); |
| 131 | |
| 132 | i915_oa_config_put(oa_config); |
| 133 | |
| 134 | return stream; |
| 135 | } |
| 136 | |
| 137 | static void stream_destroy(struct i915_perf_stream *stream) |
| 138 | { |
| 139 | struct intel_gt *gt = stream->engine->gt; |
| 140 | |
| 141 | mutex_lock(>->perf.lock); |
| 142 | i915_perf_destroy_locked(stream); |
| 143 | mutex_unlock(lock: >->perf.lock); |
| 144 | } |
| 145 | |
| 146 | static int live_sanitycheck(void *arg) |
| 147 | { |
| 148 | struct drm_i915_private *i915 = arg; |
| 149 | struct i915_perf_stream *stream; |
| 150 | |
| 151 | /* Quick check we can create a perf stream */ |
| 152 | |
| 153 | stream = test_stream(perf: &i915->perf); |
| 154 | if (!stream) |
| 155 | return -EINVAL; |
| 156 | |
| 157 | stream_destroy(stream); |
| 158 | return 0; |
| 159 | } |
| 160 | |
| 161 | static int write_timestamp(struct i915_request *rq, int slot) |
| 162 | { |
| 163 | u32 *cs; |
| 164 | int len; |
| 165 | |
| 166 | cs = intel_ring_begin(rq, num_dwords: 6); |
| 167 | if (IS_ERR(ptr: cs)) |
| 168 | return PTR_ERR(ptr: cs); |
| 169 | |
| 170 | len = 5; |
| 171 | if (GRAPHICS_VER(rq->i915) >= 8) |
| 172 | len++; |
| 173 | |
| 174 | *cs++ = GFX_OP_PIPE_CONTROL(len); |
| 175 | *cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | |
| 176 | PIPE_CONTROL_STORE_DATA_INDEX | |
| 177 | PIPE_CONTROL_WRITE_TIMESTAMP; |
| 178 | *cs++ = slot * sizeof(u32); |
| 179 | *cs++ = 0; |
| 180 | *cs++ = 0; |
| 181 | *cs++ = 0; |
| 182 | |
| 183 | intel_ring_advance(rq, cs); |
| 184 | |
| 185 | return 0; |
| 186 | } |
| 187 | |
| 188 | static ktime_t poll_status(struct i915_request *rq, int slot) |
| 189 | { |
| 190 | while (!intel_read_status_page(engine: rq->engine, reg: slot) && |
| 191 | !i915_request_completed(rq)) |
| 192 | cpu_relax(); |
| 193 | |
| 194 | return ktime_get(); |
| 195 | } |
| 196 | |
| 197 | static int live_noa_delay(void *arg) |
| 198 | { |
| 199 | struct drm_i915_private *i915 = arg; |
| 200 | struct i915_perf_stream *stream; |
| 201 | struct i915_request *rq; |
| 202 | ktime_t t0, t1; |
| 203 | u64 expected; |
| 204 | u32 delay; |
| 205 | int err; |
| 206 | int i; |
| 207 | |
| 208 | /* Check that the GPU delays matches expectations */ |
| 209 | |
| 210 | stream = test_stream(perf: &i915->perf); |
| 211 | if (!stream) |
| 212 | return -ENOMEM; |
| 213 | |
| 214 | expected = atomic64_read(v: &stream->perf->noa_programming_delay); |
| 215 | |
| 216 | if (stream->engine->class != RENDER_CLASS) { |
| 217 | err = -ENODEV; |
| 218 | goto out; |
| 219 | } |
| 220 | |
| 221 | for (i = 0; i < 4; i++) |
| 222 | intel_write_status_page(engine: stream->engine, reg: 0x100 + i, value: 0); |
| 223 | |
| 224 | rq = intel_engine_create_kernel_request(engine: stream->engine); |
| 225 | if (IS_ERR(ptr: rq)) { |
| 226 | err = PTR_ERR(ptr: rq); |
| 227 | goto out; |
| 228 | } |
| 229 | |
| 230 | if (rq->engine->emit_init_breadcrumb) { |
| 231 | err = rq->engine->emit_init_breadcrumb(rq); |
| 232 | if (err) { |
| 233 | i915_request_add(rq); |
| 234 | goto out; |
| 235 | } |
| 236 | } |
| 237 | |
| 238 | err = write_timestamp(rq, slot: 0x100); |
| 239 | if (err) { |
| 240 | i915_request_add(rq); |
| 241 | goto out; |
| 242 | } |
| 243 | |
| 244 | err = rq->engine->emit_bb_start(rq, |
| 245 | i915_ggtt_offset(vma: stream->noa_wait), 0, |
| 246 | I915_DISPATCH_SECURE); |
| 247 | if (err) { |
| 248 | i915_request_add(rq); |
| 249 | goto out; |
| 250 | } |
| 251 | |
| 252 | err = write_timestamp(rq, slot: 0x102); |
| 253 | if (err) { |
| 254 | i915_request_add(rq); |
| 255 | goto out; |
| 256 | } |
| 257 | |
| 258 | i915_request_get(rq); |
| 259 | i915_request_add(rq); |
| 260 | |
| 261 | preempt_disable(); |
| 262 | t0 = poll_status(rq, slot: 0x100); |
| 263 | t1 = poll_status(rq, slot: 0x102); |
| 264 | preempt_enable(); |
| 265 | |
| 266 | pr_info("CPU delay: %lluns, expected %lluns\n" , |
| 267 | ktime_sub(t1, t0), expected); |
| 268 | |
| 269 | delay = intel_read_status_page(engine: stream->engine, reg: 0x102); |
| 270 | delay -= intel_read_status_page(engine: stream->engine, reg: 0x100); |
| 271 | delay = intel_gt_clock_interval_to_ns(gt: stream->engine->gt, count: delay); |
| 272 | pr_info("GPU delay: %uns, expected %lluns\n" , |
| 273 | delay, expected); |
| 274 | |
| 275 | if (4 * delay < 3 * expected || 2 * delay > 3 * expected) { |
| 276 | pr_err("GPU delay [%uus] outside of expected threshold! [%lluus, %lluus]\n" , |
| 277 | delay / 1000, |
| 278 | div_u64(3 * expected, 4000), |
| 279 | div_u64(3 * expected, 2000)); |
| 280 | err = -EINVAL; |
| 281 | } |
| 282 | |
| 283 | i915_request_put(rq); |
| 284 | out: |
| 285 | stream_destroy(stream); |
| 286 | return err; |
| 287 | } |
| 288 | |
| 289 | static int live_noa_gpr(void *arg) |
| 290 | { |
| 291 | struct drm_i915_private *i915 = arg; |
| 292 | struct i915_perf_stream *stream; |
| 293 | struct intel_context *ce; |
| 294 | struct i915_request *rq; |
| 295 | u32 *cs, *store; |
| 296 | void *scratch; |
| 297 | u32 gpr0; |
| 298 | int err; |
| 299 | int i; |
| 300 | |
| 301 | /* Check that the delay does not clobber user context state (GPR) */ |
| 302 | |
| 303 | stream = test_stream(perf: &i915->perf); |
| 304 | if (!stream) |
| 305 | return -ENOMEM; |
| 306 | |
| 307 | gpr0 = i915_mmio_reg_offset(GEN8_RING_CS_GPR(stream->engine->mmio_base, 0)); |
| 308 | |
| 309 | ce = intel_context_create(engine: stream->engine); |
| 310 | if (IS_ERR(ptr: ce)) { |
| 311 | err = PTR_ERR(ptr: ce); |
| 312 | goto out; |
| 313 | } |
| 314 | |
| 315 | /* Poison the ce->vm so we detect writes not to the GGTT gt->scratch */ |
| 316 | scratch = __px_vaddr(p: ce->vm->scratch[0]); |
| 317 | memset(scratch, POISON_FREE, PAGE_SIZE); |
| 318 | |
| 319 | rq = intel_context_create_request(ce); |
| 320 | if (IS_ERR(ptr: rq)) { |
| 321 | err = PTR_ERR(ptr: rq); |
| 322 | goto out_ce; |
| 323 | } |
| 324 | i915_request_get(rq); |
| 325 | |
| 326 | if (rq->engine->emit_init_breadcrumb) { |
| 327 | err = rq->engine->emit_init_breadcrumb(rq); |
| 328 | if (err) { |
| 329 | i915_request_add(rq); |
| 330 | goto out_rq; |
| 331 | } |
| 332 | } |
| 333 | |
| 334 | /* Fill the 16 qword [32 dword] GPR with a known unlikely value */ |
| 335 | cs = intel_ring_begin(rq, num_dwords: 2 * 32 + 2); |
| 336 | if (IS_ERR(ptr: cs)) { |
| 337 | err = PTR_ERR(ptr: cs); |
| 338 | i915_request_add(rq); |
| 339 | goto out_rq; |
| 340 | } |
| 341 | |
| 342 | *cs++ = MI_LOAD_REGISTER_IMM(32); |
| 343 | for (i = 0; i < 32; i++) { |
| 344 | *cs++ = gpr0 + i * sizeof(u32); |
| 345 | *cs++ = STACK_MAGIC; |
| 346 | } |
| 347 | *cs++ = MI_NOOP; |
| 348 | intel_ring_advance(rq, cs); |
| 349 | |
| 350 | /* Execute the GPU delay */ |
| 351 | err = rq->engine->emit_bb_start(rq, |
| 352 | i915_ggtt_offset(vma: stream->noa_wait), 0, |
| 353 | I915_DISPATCH_SECURE); |
| 354 | if (err) { |
| 355 | i915_request_add(rq); |
| 356 | goto out_rq; |
| 357 | } |
| 358 | |
| 359 | /* Read the GPR back, using the pinned global HWSP for convenience */ |
| 360 | store = memset32(s: rq->engine->status_page.addr + 512, v: 0, n: 32); |
| 361 | for (i = 0; i < 32; i++) { |
| 362 | u32 cmd; |
| 363 | |
| 364 | cs = intel_ring_begin(rq, num_dwords: 4); |
| 365 | if (IS_ERR(ptr: cs)) { |
| 366 | err = PTR_ERR(ptr: cs); |
| 367 | i915_request_add(rq); |
| 368 | goto out_rq; |
| 369 | } |
| 370 | |
| 371 | cmd = MI_STORE_REGISTER_MEM; |
| 372 | if (GRAPHICS_VER(i915) >= 8) |
| 373 | cmd++; |
| 374 | cmd |= MI_USE_GGTT; |
| 375 | |
| 376 | *cs++ = cmd; |
| 377 | *cs++ = gpr0 + i * sizeof(u32); |
| 378 | *cs++ = i915_ggtt_offset(vma: rq->engine->status_page.vma) + |
| 379 | offset_in_page(store) + |
| 380 | i * sizeof(u32); |
| 381 | *cs++ = 0; |
| 382 | intel_ring_advance(rq, cs); |
| 383 | } |
| 384 | |
| 385 | i915_request_add(rq); |
| 386 | |
| 387 | if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, HZ / 2) < 0) { |
| 388 | pr_err("noa_wait timed out\n" ); |
| 389 | intel_gt_set_wedged(gt: stream->engine->gt); |
| 390 | err = -EIO; |
| 391 | goto out_rq; |
| 392 | } |
| 393 | |
| 394 | /* Verify that the GPR contain our expected values */ |
| 395 | for (i = 0; i < 32; i++) { |
| 396 | if (store[i] == STACK_MAGIC) |
| 397 | continue; |
| 398 | |
| 399 | pr_err("GPR[%d] lost, found:%08x, expected:%08x!\n" , |
| 400 | i, store[i], STACK_MAGIC); |
| 401 | err = -EINVAL; |
| 402 | } |
| 403 | |
| 404 | /* Verify that the user's scratch page was not used for GPR storage */ |
| 405 | if (memchr_inv(p: scratch, POISON_FREE, PAGE_SIZE)) { |
| 406 | pr_err("Scratch page overwritten!\n" ); |
| 407 | igt_hexdump(buf: scratch, len: 4096); |
| 408 | err = -EINVAL; |
| 409 | } |
| 410 | |
| 411 | out_rq: |
| 412 | i915_request_put(rq); |
| 413 | out_ce: |
| 414 | intel_context_put(ce); |
| 415 | out: |
| 416 | stream_destroy(stream); |
| 417 | return err; |
| 418 | } |
| 419 | |
| 420 | int i915_perf_live_selftests(struct drm_i915_private *i915) |
| 421 | { |
| 422 | static const struct i915_subtest tests[] = { |
| 423 | SUBTEST(live_sanitycheck), |
| 424 | SUBTEST(live_noa_delay), |
| 425 | SUBTEST(live_noa_gpr), |
| 426 | }; |
| 427 | struct i915_perf *perf = &i915->perf; |
| 428 | int err; |
| 429 | |
| 430 | if (!perf->metrics_kobj || !perf->ops.enable_metric_set) |
| 431 | return 0; |
| 432 | |
| 433 | if (intel_gt_is_wedged(gt: to_gt(i915))) |
| 434 | return 0; |
| 435 | |
| 436 | err = alloc_empty_config(perf: &i915->perf); |
| 437 | if (err) |
| 438 | return err; |
| 439 | |
| 440 | err = i915_live_subtests(tests, i915); |
| 441 | |
| 442 | destroy_empty_config(perf: &i915->perf); |
| 443 | |
| 444 | return err; |
| 445 | } |
| 446 | |