1// SPDX-License-Identifier: MIT
2/*
3 * Copyright © 2018 Intel Corporation
4 */
5
6#include <linux/prime_numbers.h>
7
8#include "gem/i915_gem_internal.h"
9
10#include "i915_drv.h"
11#include "i915_selftest.h"
12#include "intel_engine_heartbeat.h"
13#include "intel_engine_pm.h"
14#include "intel_reset.h"
15#include "intel_ring.h"
16#include "selftest_engine_heartbeat.h"
17#include "selftests/i915_random.h"
18#include "selftests/igt_flush_test.h"
19#include "selftests/igt_live_test.h"
20#include "selftests/igt_spinner.h"
21#include "selftests/lib_sw_fence.h"
22#include "shmem_utils.h"
23
24#include "gem/selftests/igt_gem_utils.h"
25#include "gem/selftests/mock_context.h"
26
27#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4)
28#define NUM_GPR 16
29#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */
30
31#define LRI_HEADER MI_INSTR(0x22, 0)
32#define LRI_LENGTH_MASK GENMASK(7, 0)
33
34static struct i915_vma *create_scratch(struct intel_gt *gt)
35{
36 return __vm_create_scratch_for_read_pinned(vm: &gt->ggtt->vm, PAGE_SIZE);
37}
38
39static bool is_active(struct i915_request *rq)
40{
41 if (i915_request_is_active(rq))
42 return true;
43
44 if (i915_request_on_hold(rq))
45 return true;
46
47 if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq))
48 return true;
49
50 return false;
51}
52
53static int wait_for_submit(struct intel_engine_cs *engine,
54 struct i915_request *rq,
55 unsigned long timeout)
56{
57 /* Ignore our own attempts to suppress excess tasklets */
58 tasklet_hi_schedule(t: &engine->sched_engine->tasklet);
59
60 timeout += jiffies;
61 do {
62 bool done = time_after(jiffies, timeout);
63
64 if (i915_request_completed(rq)) /* that was quick! */
65 return 0;
66
67 /* Wait until the HW has acknowledged the submission (or err) */
68 intel_engine_flush_submission(engine);
69 if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq))
70 return 0;
71
72 if (done)
73 return -ETIME;
74
75 cond_resched();
76 } while (1);
77}
78
79static int emit_semaphore_signal(struct intel_context *ce, void *slot)
80{
81 const u32 offset =
82 i915_ggtt_offset(vma: ce->engine->status_page.vma) +
83 offset_in_page(slot);
84 struct i915_request *rq;
85 u32 *cs;
86
87 rq = intel_context_create_request(ce);
88 if (IS_ERR(ptr: rq))
89 return PTR_ERR(ptr: rq);
90
91 cs = intel_ring_begin(rq, num_dwords: 4);
92 if (IS_ERR(ptr: cs)) {
93 i915_request_add(rq);
94 return PTR_ERR(ptr: cs);
95 }
96
97 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
98 *cs++ = offset;
99 *cs++ = 0;
100 *cs++ = 1;
101
102 intel_ring_advance(rq, cs);
103
104 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
105 i915_request_add(rq);
106 return 0;
107}
108
109static int context_flush(struct intel_context *ce, long timeout)
110{
111 struct i915_request *rq;
112 struct dma_fence *fence;
113 int err = 0;
114
115 rq = intel_engine_create_kernel_request(engine: ce->engine);
116 if (IS_ERR(ptr: rq))
117 return PTR_ERR(ptr: rq);
118
119 fence = i915_active_fence_get(active: &ce->timeline->last_request);
120 if (fence) {
121 i915_request_await_dma_fence(rq, fence);
122 dma_fence_put(fence);
123 }
124
125 rq = i915_request_get(rq);
126 i915_request_add(rq);
127 if (i915_request_wait(rq, flags: 0, timeout) < 0)
128 err = -ETIME;
129 i915_request_put(rq);
130
131 rmb(); /* We know the request is written, make sure all state is too! */
132 return err;
133}
134
135static int get_lri_mask(struct intel_engine_cs *engine, u32 lri)
136{
137 if ((lri & MI_LRI_LRM_CS_MMIO) == 0)
138 return ~0u;
139
140 if (GRAPHICS_VER(engine->i915) < 12)
141 return 0xfff;
142
143 switch (engine->class) {
144 default:
145 case RENDER_CLASS:
146 case COMPUTE_CLASS:
147 return 0x07ff;
148 case COPY_ENGINE_CLASS:
149 return 0x0fff;
150 case VIDEO_DECODE_CLASS:
151 case VIDEO_ENHANCEMENT_CLASS:
152 return 0x3fff;
153 }
154}
155
156static int live_lrc_layout(void *arg)
157{
158 struct intel_gt *gt = arg;
159 struct intel_engine_cs *engine;
160 enum intel_engine_id id;
161 u32 *lrc;
162 int err;
163
164 /*
165 * Check the registers offsets we use to create the initial reg state
166 * match the layout saved by HW.
167 */
168
169 lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */
170 if (!lrc)
171 return -ENOMEM;
172 GEM_BUG_ON(offset_in_page(lrc));
173
174 err = 0;
175 for_each_engine(engine, gt, id) {
176 u32 *hw;
177 int dw;
178
179 if (!engine->default_state)
180 continue;
181
182 hw = shmem_pin_map(file: engine->default_state);
183 if (!hw) {
184 err = -ENOMEM;
185 break;
186 }
187 hw += LRC_STATE_OFFSET / sizeof(*hw);
188
189 __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE),
190 ce: engine->kernel_context, engine, inhibit: true);
191
192 dw = 0;
193 do {
194 u32 lri = READ_ONCE(hw[dw]);
195 u32 lri_mask;
196
197 if (lri == 0) {
198 dw++;
199 continue;
200 }
201
202 if (lrc[dw] == 0) {
203 pr_debug("%s: skipped instruction %x at dword %d\n",
204 engine->name, lri, dw);
205 dw++;
206 continue;
207 }
208
209 if ((lri & GENMASK(31, 23)) != LRI_HEADER) {
210 pr_err("%s: Expected LRI command at dword %d, found %08x\n",
211 engine->name, dw, lri);
212 err = -EINVAL;
213 break;
214 }
215
216 if (lrc[dw] != lri) {
217 pr_err("%s: LRI command mismatch at dword %d, expected %08x found %08x\n",
218 engine->name, dw, lri, lrc[dw]);
219 err = -EINVAL;
220 break;
221 }
222
223 /*
224 * When bit 19 of MI_LOAD_REGISTER_IMM instruction
225 * opcode is set on Gen12+ devices, HW does not
226 * care about certain register address offsets, and
227 * instead check the following for valid address
228 * ranges on specific engines:
229 * RCS && CCS: BITS(0 - 10)
230 * BCS: BITS(0 - 11)
231 * VECS && VCS: BITS(0 - 13)
232 */
233 lri_mask = get_lri_mask(engine, lri);
234
235 lri &= 0x7f;
236 lri++;
237 dw++;
238
239 while (lri) {
240 u32 offset = READ_ONCE(hw[dw]);
241
242 if ((offset ^ lrc[dw]) & lri_mask) {
243 pr_err("%s: Different registers found at dword %d, expected %x, found %x\n",
244 engine->name, dw, offset, lrc[dw]);
245 err = -EINVAL;
246 break;
247 }
248
249 /*
250 * Skip over the actual register value as we
251 * expect that to differ.
252 */
253 dw += 2;
254 lri -= 2;
255 }
256 } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
257
258 if (err) {
259 pr_info("%s: HW register image:\n", engine->name);
260 igt_hexdump(buf: hw, PAGE_SIZE);
261
262 pr_info("%s: SW register image:\n", engine->name);
263 igt_hexdump(buf: lrc, PAGE_SIZE);
264 }
265
266 shmem_unpin_map(file: engine->default_state, ptr: hw);
267 if (err)
268 break;
269 }
270
271 free_page((unsigned long)lrc);
272 return err;
273}
274
275static int find_offset(const u32 *lri, u32 offset)
276{
277 int i;
278
279 for (i = 0; i < PAGE_SIZE / sizeof(u32); i++)
280 if (lri[i] == offset)
281 return i;
282
283 return -1;
284}
285
286static int live_lrc_fixed(void *arg)
287{
288 struct intel_gt *gt = arg;
289 struct intel_engine_cs *engine;
290 enum intel_engine_id id;
291 int err = 0;
292
293 /*
294 * Check the assumed register offsets match the actual locations in
295 * the context image.
296 */
297
298 for_each_engine(engine, gt, id) {
299 const struct {
300 u32 reg;
301 u32 offset;
302 const char *name;
303 } tbl[] = {
304 {
305 i915_mmio_reg_offset(RING_START(engine->mmio_base)),
306 CTX_RING_START - 1,
307 "RING_START"
308 },
309 {
310 i915_mmio_reg_offset(RING_CTL(engine->mmio_base)),
311 CTX_RING_CTL - 1,
312 "RING_CTL"
313 },
314 {
315 i915_mmio_reg_offset(RING_HEAD(engine->mmio_base)),
316 CTX_RING_HEAD - 1,
317 "RING_HEAD"
318 },
319 {
320 i915_mmio_reg_offset(RING_TAIL(engine->mmio_base)),
321 CTX_RING_TAIL - 1,
322 "RING_TAIL"
323 },
324 {
325 i915_mmio_reg_offset(RING_MI_MODE(engine->mmio_base)),
326 lrc_ring_mi_mode(engine),
327 "RING_MI_MODE"
328 },
329 {
330 i915_mmio_reg_offset(RING_BBSTATE(engine->mmio_base)),
331 CTX_BB_STATE - 1,
332 "BB_STATE"
333 },
334 {
335 i915_mmio_reg_offset(RING_BB_PER_CTX_PTR(engine->mmio_base)),
336 lrc_ring_wa_bb_per_ctx(engine),
337 "RING_BB_PER_CTX_PTR"
338 },
339 {
340 i915_mmio_reg_offset(RING_INDIRECT_CTX(engine->mmio_base)),
341 lrc_ring_indirect_ptr(engine),
342 "RING_INDIRECT_CTX_PTR"
343 },
344 {
345 i915_mmio_reg_offset(RING_INDIRECT_CTX_OFFSET(engine->mmio_base)),
346 lrc_ring_indirect_offset(engine),
347 "RING_INDIRECT_CTX_OFFSET"
348 },
349 {
350 i915_mmio_reg_offset(RING_CTX_TIMESTAMP(engine->mmio_base)),
351 CTX_TIMESTAMP - 1,
352 "RING_CTX_TIMESTAMP"
353 },
354 {
355 i915_mmio_reg_offset(GEN8_RING_CS_GPR(engine->mmio_base, 0)),
356 lrc_ring_gpr0(engine),
357 "RING_CS_GPR0"
358 },
359 {
360 i915_mmio_reg_offset(RING_CMD_BUF_CCTL(engine->mmio_base)),
361 lrc_ring_cmd_buf_cctl(engine),
362 "RING_CMD_BUF_CCTL"
363 },
364 {
365 i915_mmio_reg_offset(RING_BB_OFFSET(engine->mmio_base)),
366 lrc_ring_bb_offset(engine),
367 "RING_BB_OFFSET"
368 },
369 { },
370 }, *t;
371 u32 *hw;
372
373 if (!engine->default_state)
374 continue;
375
376 hw = shmem_pin_map(file: engine->default_state);
377 if (!hw) {
378 err = -ENOMEM;
379 break;
380 }
381 hw += LRC_STATE_OFFSET / sizeof(*hw);
382
383 for (t = tbl; t->name; t++) {
384 int dw = find_offset(lri: hw, offset: t->reg);
385
386 if (dw != t->offset) {
387 pr_err("%s: Offset for %s [0x%x] mismatch, found %x, expected %x\n",
388 engine->name,
389 t->name,
390 t->reg,
391 dw,
392 t->offset);
393 err = -EINVAL;
394 }
395 }
396
397 shmem_unpin_map(file: engine->default_state, ptr: hw);
398 }
399
400 return err;
401}
402
403static int __live_lrc_state(struct intel_engine_cs *engine,
404 struct i915_vma *scratch)
405{
406 struct intel_context *ce;
407 struct i915_request *rq;
408 struct i915_gem_ww_ctx ww;
409 enum {
410 RING_START_IDX = 0,
411 RING_TAIL_IDX,
412 MAX_IDX
413 };
414 u32 expected[MAX_IDX];
415 u32 *cs;
416 int err;
417 int n;
418
419 ce = intel_context_create(engine);
420 if (IS_ERR(ptr: ce))
421 return PTR_ERR(ptr: ce);
422
423 i915_gem_ww_ctx_init(ctx: &ww, intr: false);
424retry:
425 err = i915_gem_object_lock(obj: scratch->obj, ww: &ww);
426 if (!err)
427 err = intel_context_pin_ww(ce, ww: &ww);
428 if (err)
429 goto err_put;
430
431 rq = i915_request_create(ce);
432 if (IS_ERR(ptr: rq)) {
433 err = PTR_ERR(ptr: rq);
434 goto err_unpin;
435 }
436
437 cs = intel_ring_begin(rq, num_dwords: 4 * MAX_IDX);
438 if (IS_ERR(ptr: cs)) {
439 err = PTR_ERR(ptr: cs);
440 i915_request_add(rq);
441 goto err_unpin;
442 }
443
444 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
445 *cs++ = i915_mmio_reg_offset(RING_START(engine->mmio_base));
446 *cs++ = i915_ggtt_offset(vma: scratch) + RING_START_IDX * sizeof(u32);
447 *cs++ = 0;
448
449 expected[RING_START_IDX] = i915_ggtt_offset(vma: ce->ring->vma);
450
451 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
452 *cs++ = i915_mmio_reg_offset(RING_TAIL(engine->mmio_base));
453 *cs++ = i915_ggtt_offset(vma: scratch) + RING_TAIL_IDX * sizeof(u32);
454 *cs++ = 0;
455
456 err = i915_vma_move_to_active(vma: scratch, rq, EXEC_OBJECT_WRITE);
457
458 i915_request_get(rq);
459 i915_request_add(rq);
460 if (err)
461 goto err_rq;
462
463 intel_engine_flush_submission(engine);
464 expected[RING_TAIL_IDX] = ce->ring->tail;
465
466 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) {
467 err = -ETIME;
468 goto err_rq;
469 }
470
471 cs = i915_gem_object_pin_map(obj: scratch->obj, type: I915_MAP_WB);
472 if (IS_ERR(ptr: cs)) {
473 err = PTR_ERR(ptr: cs);
474 goto err_rq;
475 }
476
477 for (n = 0; n < MAX_IDX; n++) {
478 if (cs[n] != expected[n]) {
479 pr_err("%s: Stored register[%d] value[0x%x] did not match expected[0x%x]\n",
480 engine->name, n, cs[n], expected[n]);
481 err = -EINVAL;
482 break;
483 }
484 }
485
486 i915_gem_object_unpin_map(obj: scratch->obj);
487
488err_rq:
489 i915_request_put(rq);
490err_unpin:
491 intel_context_unpin(ce);
492err_put:
493 if (err == -EDEADLK) {
494 err = i915_gem_ww_ctx_backoff(ctx: &ww);
495 if (!err)
496 goto retry;
497 }
498 i915_gem_ww_ctx_fini(ctx: &ww);
499 intel_context_put(ce);
500 return err;
501}
502
503static int live_lrc_state(void *arg)
504{
505 struct intel_gt *gt = arg;
506 struct intel_engine_cs *engine;
507 struct i915_vma *scratch;
508 enum intel_engine_id id;
509 int err = 0;
510
511 /*
512 * Check the live register state matches what we expect for this
513 * intel_context.
514 */
515
516 scratch = create_scratch(gt);
517 if (IS_ERR(ptr: scratch))
518 return PTR_ERR(ptr: scratch);
519
520 for_each_engine(engine, gt, id) {
521 err = __live_lrc_state(engine, scratch);
522 if (err)
523 break;
524 }
525
526 if (igt_flush_test(i915: gt->i915))
527 err = -EIO;
528
529 i915_vma_unpin_and_release(p_vma: &scratch, flags: 0);
530 return err;
531}
532
533static int gpr_make_dirty(struct intel_context *ce)
534{
535 struct i915_request *rq;
536 u32 *cs;
537 int n;
538
539 rq = intel_context_create_request(ce);
540 if (IS_ERR(ptr: rq))
541 return PTR_ERR(ptr: rq);
542
543 cs = intel_ring_begin(rq, num_dwords: 2 * NUM_GPR_DW + 2);
544 if (IS_ERR(ptr: cs)) {
545 i915_request_add(rq);
546 return PTR_ERR(ptr: cs);
547 }
548
549 *cs++ = MI_LOAD_REGISTER_IMM(NUM_GPR_DW);
550 for (n = 0; n < NUM_GPR_DW; n++) {
551 *cs++ = CS_GPR(ce->engine, n);
552 *cs++ = STACK_MAGIC;
553 }
554 *cs++ = MI_NOOP;
555
556 intel_ring_advance(rq, cs);
557
558 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
559 i915_request_add(rq);
560
561 return 0;
562}
563
564static struct i915_request *
565__gpr_read(struct intel_context *ce, struct i915_vma *scratch, u32 *slot)
566{
567 const u32 offset =
568 i915_ggtt_offset(vma: ce->engine->status_page.vma) +
569 offset_in_page(slot);
570 struct i915_request *rq;
571 u32 *cs;
572 int err;
573 int n;
574
575 rq = intel_context_create_request(ce);
576 if (IS_ERR(ptr: rq))
577 return rq;
578
579 cs = intel_ring_begin(rq, num_dwords: 6 + 4 * NUM_GPR_DW);
580 if (IS_ERR(ptr: cs)) {
581 i915_request_add(rq);
582 return ERR_CAST(ptr: cs);
583 }
584
585 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
586 *cs++ = MI_NOOP;
587
588 *cs++ = MI_SEMAPHORE_WAIT |
589 MI_SEMAPHORE_GLOBAL_GTT |
590 MI_SEMAPHORE_POLL |
591 MI_SEMAPHORE_SAD_NEQ_SDD;
592 *cs++ = 0;
593 *cs++ = offset;
594 *cs++ = 0;
595
596 for (n = 0; n < NUM_GPR_DW; n++) {
597 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
598 *cs++ = CS_GPR(ce->engine, n);
599 *cs++ = i915_ggtt_offset(vma: scratch) + n * sizeof(u32);
600 *cs++ = 0;
601 }
602
603 err = igt_vma_move_to_active_unlocked(vma: scratch, rq, EXEC_OBJECT_WRITE);
604
605 i915_request_get(rq);
606 i915_request_add(rq);
607 if (err) {
608 i915_request_put(rq);
609 rq = ERR_PTR(error: err);
610 }
611
612 return rq;
613}
614
615static int __live_lrc_gpr(struct intel_engine_cs *engine,
616 struct i915_vma *scratch,
617 bool preempt)
618{
619 u32 *slot = memset32(s: engine->status_page.addr + 1000, v: 0, n: 4);
620 struct intel_context *ce;
621 struct i915_request *rq;
622 u32 *cs;
623 int err;
624 int n;
625
626 if (GRAPHICS_VER(engine->i915) < 9 && engine->class != RENDER_CLASS)
627 return 0; /* GPR only on rcs0 for gen8 */
628
629 err = gpr_make_dirty(ce: engine->kernel_context);
630 if (err)
631 return err;
632
633 ce = intel_context_create(engine);
634 if (IS_ERR(ptr: ce))
635 return PTR_ERR(ptr: ce);
636
637 rq = __gpr_read(ce, scratch, slot);
638 if (IS_ERR(ptr: rq)) {
639 err = PTR_ERR(ptr: rq);
640 goto err_put;
641 }
642
643 err = wait_for_submit(engine, rq, HZ / 2);
644 if (err)
645 goto err_rq;
646
647 if (preempt) {
648 err = gpr_make_dirty(ce: engine->kernel_context);
649 if (err)
650 goto err_rq;
651
652 err = emit_semaphore_signal(ce: engine->kernel_context, slot);
653 if (err)
654 goto err_rq;
655
656 err = wait_for_submit(engine, rq, HZ / 2);
657 if (err)
658 goto err_rq;
659 } else {
660 slot[0] = 1;
661 wmb();
662 }
663
664 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) {
665 err = -ETIME;
666 goto err_rq;
667 }
668
669 cs = i915_gem_object_pin_map_unlocked(obj: scratch->obj, type: I915_MAP_WB);
670 if (IS_ERR(ptr: cs)) {
671 err = PTR_ERR(ptr: cs);
672 goto err_rq;
673 }
674
675 for (n = 0; n < NUM_GPR_DW; n++) {
676 if (cs[n]) {
677 pr_err("%s: GPR[%d].%s was not zero, found 0x%08x!\n",
678 engine->name,
679 n / 2, n & 1 ? "udw" : "ldw",
680 cs[n]);
681 err = -EINVAL;
682 break;
683 }
684 }
685
686 i915_gem_object_unpin_map(obj: scratch->obj);
687
688err_rq:
689 memset32(s: &slot[0], v: -1, n: 4);
690 wmb();
691 i915_request_put(rq);
692err_put:
693 intel_context_put(ce);
694 return err;
695}
696
697static int live_lrc_gpr(void *arg)
698{
699 struct intel_gt *gt = arg;
700 struct intel_engine_cs *engine;
701 struct i915_vma *scratch;
702 enum intel_engine_id id;
703 int err = 0;
704
705 /*
706 * Check that GPR registers are cleared in new contexts as we need
707 * to avoid leaking any information from previous contexts.
708 */
709
710 scratch = create_scratch(gt);
711 if (IS_ERR(ptr: scratch))
712 return PTR_ERR(ptr: scratch);
713
714 for_each_engine(engine, gt, id) {
715 st_engine_heartbeat_disable(engine);
716
717 err = __live_lrc_gpr(engine, scratch, preempt: false);
718 if (err)
719 goto err;
720
721 err = __live_lrc_gpr(engine, scratch, preempt: true);
722 if (err)
723 goto err;
724
725err:
726 st_engine_heartbeat_enable(engine);
727 if (igt_flush_test(i915: gt->i915))
728 err = -EIO;
729 if (err)
730 break;
731 }
732
733 i915_vma_unpin_and_release(p_vma: &scratch, flags: 0);
734 return err;
735}
736
737static struct i915_request *
738create_timestamp(struct intel_context *ce, void *slot, int idx)
739{
740 const u32 offset =
741 i915_ggtt_offset(vma: ce->engine->status_page.vma) +
742 offset_in_page(slot);
743 struct i915_request *rq;
744 u32 *cs;
745 int err;
746
747 rq = intel_context_create_request(ce);
748 if (IS_ERR(ptr: rq))
749 return rq;
750
751 cs = intel_ring_begin(rq, num_dwords: 10);
752 if (IS_ERR(ptr: cs)) {
753 err = PTR_ERR(ptr: cs);
754 goto err;
755 }
756
757 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
758 *cs++ = MI_NOOP;
759
760 *cs++ = MI_SEMAPHORE_WAIT |
761 MI_SEMAPHORE_GLOBAL_GTT |
762 MI_SEMAPHORE_POLL |
763 MI_SEMAPHORE_SAD_NEQ_SDD;
764 *cs++ = 0;
765 *cs++ = offset;
766 *cs++ = 0;
767
768 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
769 *cs++ = i915_mmio_reg_offset(RING_CTX_TIMESTAMP(rq->engine->mmio_base));
770 *cs++ = offset + idx * sizeof(u32);
771 *cs++ = 0;
772
773 intel_ring_advance(rq, cs);
774
775 err = 0;
776err:
777 i915_request_get(rq);
778 i915_request_add(rq);
779 if (err) {
780 i915_request_put(rq);
781 return ERR_PTR(error: err);
782 }
783
784 return rq;
785}
786
787struct lrc_timestamp {
788 struct intel_engine_cs *engine;
789 struct intel_context *ce[2];
790 u32 poison;
791};
792
793static bool timestamp_advanced(u32 start, u32 end)
794{
795 return (s32)(end - start) > 0;
796}
797
798static int __lrc_timestamp(const struct lrc_timestamp *arg, bool preempt)
799{
800 u32 *slot = memset32(s: arg->engine->status_page.addr + 1000, v: 0, n: 4);
801 struct i915_request *rq;
802 u32 timestamp;
803 int err = 0;
804
805 arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP] = arg->poison;
806 rq = create_timestamp(ce: arg->ce[0], slot, idx: 1);
807 if (IS_ERR(ptr: rq))
808 return PTR_ERR(ptr: rq);
809
810 err = wait_for_submit(engine: rq->engine, rq, HZ / 2);
811 if (err)
812 goto err;
813
814 if (preempt) {
815 arg->ce[1]->lrc_reg_state[CTX_TIMESTAMP] = 0xdeadbeef;
816 err = emit_semaphore_signal(ce: arg->ce[1], slot);
817 if (err)
818 goto err;
819 } else {
820 slot[0] = 1;
821 wmb();
822 }
823
824 /* And wait for switch to kernel (to save our context to memory) */
825 err = context_flush(ce: arg->ce[0], HZ / 2);
826 if (err)
827 goto err;
828
829 if (!timestamp_advanced(start: arg->poison, end: slot[1])) {
830 pr_err("%s(%s): invalid timestamp on restore, context:%x, request:%x\n",
831 arg->engine->name, preempt ? "preempt" : "simple",
832 arg->poison, slot[1]);
833 err = -EINVAL;
834 }
835
836 timestamp = READ_ONCE(arg->ce[0]->lrc_reg_state[CTX_TIMESTAMP]);
837 if (!timestamp_advanced(start: slot[1], end: timestamp)) {
838 pr_err("%s(%s): invalid timestamp on save, request:%x, context:%x\n",
839 arg->engine->name, preempt ? "preempt" : "simple",
840 slot[1], timestamp);
841 err = -EINVAL;
842 }
843
844err:
845 memset32(s: slot, v: -1, n: 4);
846 i915_request_put(rq);
847 return err;
848}
849
850static int live_lrc_timestamp(void *arg)
851{
852 struct lrc_timestamp data = {};
853 struct intel_gt *gt = arg;
854 enum intel_engine_id id;
855 const u32 poison[] = {
856 0,
857 S32_MAX,
858 (u32)S32_MAX + 1,
859 U32_MAX,
860 };
861
862 /*
863 * This test was designed to isolate a hardware bug.
864 * The bug was found and fixed in future generations but
865 * now the test pollutes our CI on previous generation.
866 */
867 if (GRAPHICS_VER(gt->i915) == 12)
868 return 0;
869
870 /*
871 * We want to verify that the timestamp is saved and restore across
872 * context switches and is monotonic.
873 *
874 * So we do this with a little bit of LRC poisoning to check various
875 * boundary conditions, and see what happens if we preempt the context
876 * with a second request (carrying more poison into the timestamp).
877 */
878
879 for_each_engine(data.engine, gt, id) {
880 int i, err = 0;
881
882 st_engine_heartbeat_disable(engine: data.engine);
883
884 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
885 struct intel_context *tmp;
886
887 tmp = intel_context_create(engine: data.engine);
888 if (IS_ERR(ptr: tmp)) {
889 err = PTR_ERR(ptr: tmp);
890 goto err;
891 }
892
893 err = intel_context_pin(ce: tmp);
894 if (err) {
895 intel_context_put(ce: tmp);
896 goto err;
897 }
898
899 data.ce[i] = tmp;
900 }
901
902 for (i = 0; i < ARRAY_SIZE(poison); i++) {
903 data.poison = poison[i];
904
905 err = __lrc_timestamp(arg: &data, preempt: false);
906 if (err)
907 break;
908
909 err = __lrc_timestamp(arg: &data, preempt: true);
910 if (err)
911 break;
912 }
913
914err:
915 st_engine_heartbeat_enable(engine: data.engine);
916 for (i = 0; i < ARRAY_SIZE(data.ce); i++) {
917 if (!data.ce[i])
918 break;
919
920 intel_context_unpin(ce: data.ce[i]);
921 intel_context_put(ce: data.ce[i]);
922 }
923
924 if (igt_flush_test(i915: gt->i915))
925 err = -EIO;
926 if (err)
927 return err;
928 }
929
930 return 0;
931}
932
933static struct i915_vma *
934create_user_vma(struct i915_address_space *vm, unsigned long size)
935{
936 struct drm_i915_gem_object *obj;
937 struct i915_vma *vma;
938 int err;
939
940 obj = i915_gem_object_create_internal(i915: vm->i915, size);
941 if (IS_ERR(ptr: obj))
942 return ERR_CAST(ptr: obj);
943
944 vma = i915_vma_instance(obj, vm, NULL);
945 if (IS_ERR(ptr: vma)) {
946 i915_gem_object_put(obj);
947 return vma;
948 }
949
950 err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER);
951 if (err) {
952 i915_gem_object_put(obj);
953 return ERR_PTR(error: err);
954 }
955
956 return vma;
957}
958
959static u32 safe_poison(u32 offset, u32 poison)
960{
961 /*
962 * Do not enable predication as it will nop all subsequent commands,
963 * not only disabling the tests (by preventing all the other SRM) but
964 * also preventing the arbitration events at the end of the request.
965 */
966 if (offset == i915_mmio_reg_offset(RING_PREDICATE_RESULT(0)))
967 poison &= ~REG_BIT(0);
968
969 return poison;
970}
971
972static struct i915_vma *
973store_context(struct intel_context *ce, struct i915_vma *scratch)
974{
975 struct i915_vma *batch;
976 u32 dw, x, *cs, *hw;
977 u32 *defaults;
978
979 batch = create_user_vma(vm: ce->vm, SZ_64K);
980 if (IS_ERR(ptr: batch))
981 return batch;
982
983 cs = i915_gem_object_pin_map_unlocked(obj: batch->obj, type: I915_MAP_WC);
984 if (IS_ERR(ptr: cs)) {
985 i915_vma_put(vma: batch);
986 return ERR_CAST(ptr: cs);
987 }
988
989 defaults = shmem_pin_map(file: ce->engine->default_state);
990 if (!defaults) {
991 i915_gem_object_unpin_map(obj: batch->obj);
992 i915_vma_put(vma: batch);
993 return ERR_PTR(error: -ENOMEM);
994 }
995
996 x = 0;
997 dw = 0;
998 hw = defaults;
999 hw += LRC_STATE_OFFSET / sizeof(*hw);
1000 do {
1001 u32 len = hw[dw] & LRI_LENGTH_MASK;
1002
1003 /*
1004 * Keep it simple, skip parsing complex commands
1005 *
1006 * At present, there are no more MI_LOAD_REGISTER_IMM
1007 * commands after the first 3D state command. Rather
1008 * than include a table (see i915_cmd_parser.c) of all
1009 * the possible commands and their instruction lengths
1010 * (or mask for variable length instructions), assume
1011 * we have gathered the complete list of registers and
1012 * bail out.
1013 */
1014 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1015 break;
1016
1017 if (hw[dw] == 0) {
1018 dw++;
1019 continue;
1020 }
1021
1022 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1023 /* Assume all other MI commands match LRI length mask */
1024 dw += len + 2;
1025 continue;
1026 }
1027
1028 if (!len) {
1029 pr_err("%s: invalid LRI found in context image\n",
1030 ce->engine->name);
1031 igt_hexdump(buf: defaults, PAGE_SIZE);
1032 break;
1033 }
1034
1035 dw++;
1036 len = (len + 1) / 2;
1037 while (len--) {
1038 *cs++ = MI_STORE_REGISTER_MEM_GEN8;
1039 *cs++ = hw[dw];
1040 *cs++ = lower_32_bits(i915_vma_offset(scratch) + x);
1041 *cs++ = upper_32_bits(i915_vma_offset(scratch) + x);
1042
1043 dw += 2;
1044 x += 4;
1045 }
1046 } while (dw < PAGE_SIZE / sizeof(u32) &&
1047 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1048
1049 *cs++ = MI_BATCH_BUFFER_END;
1050
1051 shmem_unpin_map(file: ce->engine->default_state, ptr: defaults);
1052
1053 i915_gem_object_flush_map(obj: batch->obj);
1054 i915_gem_object_unpin_map(obj: batch->obj);
1055
1056 return batch;
1057}
1058
1059static struct i915_request *
1060record_registers(struct intel_context *ce,
1061 struct i915_vma *before,
1062 struct i915_vma *after,
1063 u32 *sema)
1064{
1065 struct i915_vma *b_before, *b_after;
1066 struct i915_request *rq;
1067 u32 *cs;
1068 int err;
1069
1070 b_before = store_context(ce, scratch: before);
1071 if (IS_ERR(ptr: b_before))
1072 return ERR_CAST(ptr: b_before);
1073
1074 b_after = store_context(ce, scratch: after);
1075 if (IS_ERR(ptr: b_after)) {
1076 rq = ERR_CAST(ptr: b_after);
1077 goto err_before;
1078 }
1079
1080 rq = intel_context_create_request(ce);
1081 if (IS_ERR(ptr: rq))
1082 goto err_after;
1083
1084 err = igt_vma_move_to_active_unlocked(vma: before, rq, EXEC_OBJECT_WRITE);
1085 if (err)
1086 goto err_rq;
1087
1088 err = igt_vma_move_to_active_unlocked(vma: b_before, rq, flags: 0);
1089 if (err)
1090 goto err_rq;
1091
1092 err = igt_vma_move_to_active_unlocked(vma: after, rq, EXEC_OBJECT_WRITE);
1093 if (err)
1094 goto err_rq;
1095
1096 err = igt_vma_move_to_active_unlocked(vma: b_after, rq, flags: 0);
1097 if (err)
1098 goto err_rq;
1099
1100 cs = intel_ring_begin(rq, num_dwords: 14);
1101 if (IS_ERR(ptr: cs)) {
1102 err = PTR_ERR(ptr: cs);
1103 goto err_rq;
1104 }
1105
1106 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1107 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1108 *cs++ = lower_32_bits(i915_vma_offset(b_before));
1109 *cs++ = upper_32_bits(i915_vma_offset(b_before));
1110
1111 *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
1112 *cs++ = MI_SEMAPHORE_WAIT |
1113 MI_SEMAPHORE_GLOBAL_GTT |
1114 MI_SEMAPHORE_POLL |
1115 MI_SEMAPHORE_SAD_NEQ_SDD;
1116 *cs++ = 0;
1117 *cs++ = i915_ggtt_offset(vma: ce->engine->status_page.vma) +
1118 offset_in_page(sema);
1119 *cs++ = 0;
1120 *cs++ = MI_NOOP;
1121
1122 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1123 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1124 *cs++ = lower_32_bits(i915_vma_offset(b_after));
1125 *cs++ = upper_32_bits(i915_vma_offset(b_after));
1126
1127 intel_ring_advance(rq, cs);
1128
1129 WRITE_ONCE(*sema, 0);
1130 i915_request_get(rq);
1131 i915_request_add(rq);
1132err_after:
1133 i915_vma_put(vma: b_after);
1134err_before:
1135 i915_vma_put(vma: b_before);
1136 return rq;
1137
1138err_rq:
1139 i915_request_add(rq);
1140 rq = ERR_PTR(error: err);
1141 goto err_after;
1142}
1143
1144static struct i915_vma *load_context(struct intel_context *ce, u32 poison)
1145{
1146 struct i915_vma *batch;
1147 u32 dw, *cs, *hw;
1148 u32 *defaults;
1149
1150 batch = create_user_vma(vm: ce->vm, SZ_64K);
1151 if (IS_ERR(ptr: batch))
1152 return batch;
1153
1154 cs = i915_gem_object_pin_map_unlocked(obj: batch->obj, type: I915_MAP_WC);
1155 if (IS_ERR(ptr: cs)) {
1156 i915_vma_put(vma: batch);
1157 return ERR_CAST(ptr: cs);
1158 }
1159
1160 defaults = shmem_pin_map(file: ce->engine->default_state);
1161 if (!defaults) {
1162 i915_gem_object_unpin_map(obj: batch->obj);
1163 i915_vma_put(vma: batch);
1164 return ERR_PTR(error: -ENOMEM);
1165 }
1166
1167 dw = 0;
1168 hw = defaults;
1169 hw += LRC_STATE_OFFSET / sizeof(*hw);
1170 do {
1171 u32 len = hw[dw] & LRI_LENGTH_MASK;
1172
1173 /* For simplicity, break parsing at the first complex command */
1174 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1175 break;
1176
1177 if (hw[dw] == 0) {
1178 dw++;
1179 continue;
1180 }
1181
1182 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1183 dw += len + 2;
1184 continue;
1185 }
1186
1187 if (!len) {
1188 pr_err("%s: invalid LRI found in context image\n",
1189 ce->engine->name);
1190 igt_hexdump(buf: defaults, PAGE_SIZE);
1191 break;
1192 }
1193
1194 dw++;
1195 len = (len + 1) / 2;
1196 *cs++ = MI_LOAD_REGISTER_IMM(len);
1197 while (len--) {
1198 *cs++ = hw[dw];
1199 *cs++ = safe_poison(offset: hw[dw] & get_lri_mask(engine: ce->engine,
1200 MI_LRI_LRM_CS_MMIO),
1201 poison);
1202 dw += 2;
1203 }
1204 } while (dw < PAGE_SIZE / sizeof(u32) &&
1205 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1206
1207 *cs++ = MI_BATCH_BUFFER_END;
1208
1209 shmem_unpin_map(file: ce->engine->default_state, ptr: defaults);
1210
1211 i915_gem_object_flush_map(obj: batch->obj);
1212 i915_gem_object_unpin_map(obj: batch->obj);
1213
1214 return batch;
1215}
1216
1217static int poison_registers(struct intel_context *ce, u32 poison, u32 *sema)
1218{
1219 struct i915_request *rq;
1220 struct i915_vma *batch;
1221 u32 *cs;
1222 int err;
1223
1224 batch = load_context(ce, poison);
1225 if (IS_ERR(ptr: batch))
1226 return PTR_ERR(ptr: batch);
1227
1228 rq = intel_context_create_request(ce);
1229 if (IS_ERR(ptr: rq)) {
1230 err = PTR_ERR(ptr: rq);
1231 goto err_batch;
1232 }
1233
1234 err = igt_vma_move_to_active_unlocked(vma: batch, rq, flags: 0);
1235 if (err)
1236 goto err_rq;
1237
1238 cs = intel_ring_begin(rq, num_dwords: 8);
1239 if (IS_ERR(ptr: cs)) {
1240 err = PTR_ERR(ptr: cs);
1241 goto err_rq;
1242 }
1243
1244 *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
1245 *cs++ = MI_BATCH_BUFFER_START_GEN8 | BIT(8);
1246 *cs++ = lower_32_bits(i915_vma_offset(batch));
1247 *cs++ = upper_32_bits(i915_vma_offset(batch));
1248
1249 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1250 *cs++ = i915_ggtt_offset(vma: ce->engine->status_page.vma) +
1251 offset_in_page(sema);
1252 *cs++ = 0;
1253 *cs++ = 1;
1254
1255 intel_ring_advance(rq, cs);
1256
1257 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
1258err_rq:
1259 i915_request_add(rq);
1260err_batch:
1261 i915_vma_put(vma: batch);
1262 return err;
1263}
1264
1265static bool is_moving(u32 a, u32 b)
1266{
1267 return a != b;
1268}
1269
1270static int compare_isolation(struct intel_engine_cs *engine,
1271 struct i915_vma *ref[2],
1272 struct i915_vma *result[2],
1273 struct intel_context *ce,
1274 u32 poison)
1275{
1276 u32 x, dw, *hw, *lrc;
1277 u32 *A[2], *B[2];
1278 u32 *defaults;
1279 int err = 0;
1280
1281 A[0] = i915_gem_object_pin_map_unlocked(obj: ref[0]->obj, type: I915_MAP_WC);
1282 if (IS_ERR(ptr: A[0]))
1283 return PTR_ERR(ptr: A[0]);
1284
1285 A[1] = i915_gem_object_pin_map_unlocked(obj: ref[1]->obj, type: I915_MAP_WC);
1286 if (IS_ERR(ptr: A[1])) {
1287 err = PTR_ERR(ptr: A[1]);
1288 goto err_A0;
1289 }
1290
1291 B[0] = i915_gem_object_pin_map_unlocked(obj: result[0]->obj, type: I915_MAP_WC);
1292 if (IS_ERR(ptr: B[0])) {
1293 err = PTR_ERR(ptr: B[0]);
1294 goto err_A1;
1295 }
1296
1297 B[1] = i915_gem_object_pin_map_unlocked(obj: result[1]->obj, type: I915_MAP_WC);
1298 if (IS_ERR(ptr: B[1])) {
1299 err = PTR_ERR(ptr: B[1]);
1300 goto err_B0;
1301 }
1302
1303 lrc = i915_gem_object_pin_map_unlocked(obj: ce->state->obj,
1304 type: intel_gt_coherent_map_type(gt: engine->gt,
1305 obj: ce->state->obj,
1306 always_coherent: false));
1307 if (IS_ERR(ptr: lrc)) {
1308 err = PTR_ERR(ptr: lrc);
1309 goto err_B1;
1310 }
1311 lrc += LRC_STATE_OFFSET / sizeof(*hw);
1312
1313 defaults = shmem_pin_map(file: ce->engine->default_state);
1314 if (!defaults) {
1315 err = -ENOMEM;
1316 goto err_lrc;
1317 }
1318
1319 x = 0;
1320 dw = 0;
1321 hw = defaults;
1322 hw += LRC_STATE_OFFSET / sizeof(*hw);
1323 do {
1324 u32 len = hw[dw] & LRI_LENGTH_MASK;
1325
1326 /* For simplicity, break parsing at the first complex command */
1327 if ((hw[dw] >> INSTR_CLIENT_SHIFT) != INSTR_MI_CLIENT)
1328 break;
1329
1330 if (hw[dw] == 0) {
1331 dw++;
1332 continue;
1333 }
1334
1335 if ((hw[dw] & GENMASK(31, 23)) != LRI_HEADER) {
1336 dw += len + 2;
1337 continue;
1338 }
1339
1340 if (!len) {
1341 pr_err("%s: invalid LRI found in context image\n",
1342 engine->name);
1343 igt_hexdump(buf: defaults, PAGE_SIZE);
1344 break;
1345 }
1346
1347 dw++;
1348 len = (len + 1) / 2;
1349 while (len--) {
1350 if (!is_moving(a: A[0][x], b: A[1][x]) &&
1351 (A[0][x] != B[0][x] || A[1][x] != B[1][x])) {
1352 switch (hw[dw] & 4095) {
1353 case 0x30: /* RING_HEAD */
1354 case 0x34: /* RING_TAIL */
1355 break;
1356
1357 default:
1358 pr_err("%s[%d]: Mismatch for register %4x, default %08x, reference %08x, result (%08x, %08x), poison %08x, context %08x\n",
1359 engine->name, dw,
1360 hw[dw], hw[dw + 1],
1361 A[0][x], B[0][x], B[1][x],
1362 poison, lrc[dw + 1]);
1363 err = -EINVAL;
1364 }
1365 }
1366 dw += 2;
1367 x++;
1368 }
1369 } while (dw < PAGE_SIZE / sizeof(u32) &&
1370 (hw[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END);
1371
1372 shmem_unpin_map(file: ce->engine->default_state, ptr: defaults);
1373err_lrc:
1374 i915_gem_object_unpin_map(obj: ce->state->obj);
1375err_B1:
1376 i915_gem_object_unpin_map(obj: result[1]->obj);
1377err_B0:
1378 i915_gem_object_unpin_map(obj: result[0]->obj);
1379err_A1:
1380 i915_gem_object_unpin_map(obj: ref[1]->obj);
1381err_A0:
1382 i915_gem_object_unpin_map(obj: ref[0]->obj);
1383 return err;
1384}
1385
1386static struct i915_vma *
1387create_result_vma(struct i915_address_space *vm, unsigned long sz)
1388{
1389 struct i915_vma *vma;
1390 void *ptr;
1391
1392 vma = create_user_vma(vm, size: sz);
1393 if (IS_ERR(ptr: vma))
1394 return vma;
1395
1396 /* Set the results to a known value distinct from the poison */
1397 ptr = i915_gem_object_pin_map_unlocked(obj: vma->obj, type: I915_MAP_WC);
1398 if (IS_ERR(ptr)) {
1399 i915_vma_put(vma);
1400 return ERR_CAST(ptr);
1401 }
1402
1403 memset(ptr, POISON_INUSE, vma->size);
1404 i915_gem_object_flush_map(obj: vma->obj);
1405 i915_gem_object_unpin_map(obj: vma->obj);
1406
1407 return vma;
1408}
1409
1410static int __lrc_isolation(struct intel_engine_cs *engine, u32 poison)
1411{
1412 u32 *sema = memset32(s: engine->status_page.addr + 1000, v: 0, n: 1);
1413 struct i915_vma *ref[2], *result[2];
1414 struct intel_context *A, *B;
1415 struct i915_request *rq;
1416 int err;
1417
1418 A = intel_context_create(engine);
1419 if (IS_ERR(ptr: A))
1420 return PTR_ERR(ptr: A);
1421
1422 B = intel_context_create(engine);
1423 if (IS_ERR(ptr: B)) {
1424 err = PTR_ERR(ptr: B);
1425 goto err_A;
1426 }
1427
1428 ref[0] = create_result_vma(vm: A->vm, SZ_64K);
1429 if (IS_ERR(ptr: ref[0])) {
1430 err = PTR_ERR(ptr: ref[0]);
1431 goto err_B;
1432 }
1433
1434 ref[1] = create_result_vma(vm: A->vm, SZ_64K);
1435 if (IS_ERR(ptr: ref[1])) {
1436 err = PTR_ERR(ptr: ref[1]);
1437 goto err_ref0;
1438 }
1439
1440 rq = record_registers(ce: A, before: ref[0], after: ref[1], sema);
1441 if (IS_ERR(ptr: rq)) {
1442 err = PTR_ERR(ptr: rq);
1443 goto err_ref1;
1444 }
1445
1446 WRITE_ONCE(*sema, 1);
1447 wmb();
1448
1449 if (i915_request_wait(rq, flags: 0, HZ / 2) < 0) {
1450 i915_request_put(rq);
1451 err = -ETIME;
1452 goto err_ref1;
1453 }
1454 i915_request_put(rq);
1455
1456 result[0] = create_result_vma(vm: A->vm, SZ_64K);
1457 if (IS_ERR(ptr: result[0])) {
1458 err = PTR_ERR(ptr: result[0]);
1459 goto err_ref1;
1460 }
1461
1462 result[1] = create_result_vma(vm: A->vm, SZ_64K);
1463 if (IS_ERR(ptr: result[1])) {
1464 err = PTR_ERR(ptr: result[1]);
1465 goto err_result0;
1466 }
1467
1468 rq = record_registers(ce: A, before: result[0], after: result[1], sema);
1469 if (IS_ERR(ptr: rq)) {
1470 err = PTR_ERR(ptr: rq);
1471 goto err_result1;
1472 }
1473
1474 err = poison_registers(ce: B, poison, sema);
1475 if (err == 0 && i915_request_wait(rq, flags: 0, HZ / 2) < 0) {
1476 pr_err("%s(%s): wait for results timed out\n",
1477 __func__, engine->name);
1478 err = -ETIME;
1479 }
1480
1481 /* Always cancel the semaphore wait, just in case the GPU gets stuck */
1482 WRITE_ONCE(*sema, -1);
1483 i915_request_put(rq);
1484 if (err)
1485 goto err_result1;
1486
1487 err = compare_isolation(engine, ref, result, ce: A, poison);
1488
1489err_result1:
1490 i915_vma_put(vma: result[1]);
1491err_result0:
1492 i915_vma_put(vma: result[0]);
1493err_ref1:
1494 i915_vma_put(vma: ref[1]);
1495err_ref0:
1496 i915_vma_put(vma: ref[0]);
1497err_B:
1498 intel_context_put(ce: B);
1499err_A:
1500 intel_context_put(ce: A);
1501 return err;
1502}
1503
1504static bool skip_isolation(const struct intel_engine_cs *engine)
1505{
1506 if (engine->class == COPY_ENGINE_CLASS && GRAPHICS_VER(engine->i915) == 9)
1507 return true;
1508
1509 if (engine->class == RENDER_CLASS && GRAPHICS_VER(engine->i915) == 11)
1510 return true;
1511
1512 return false;
1513}
1514
1515static int live_lrc_isolation(void *arg)
1516{
1517 struct intel_gt *gt = arg;
1518 struct intel_engine_cs *engine;
1519 enum intel_engine_id id;
1520 const u32 poison[] = {
1521 STACK_MAGIC,
1522 0x3a3a3a3a,
1523 0x5c5c5c5c,
1524 0xffffffff,
1525 0xffff0000,
1526 };
1527 int err = 0;
1528
1529 /*
1530 * Our goal is try and verify that per-context state cannot be
1531 * tampered with by another non-privileged client.
1532 *
1533 * We take the list of context registers from the LRI in the default
1534 * context image and attempt to modify that list from a remote context.
1535 */
1536
1537 for_each_engine(engine, gt, id) {
1538 int i;
1539
1540 /* Just don't even ask */
1541 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN) &&
1542 skip_isolation(engine))
1543 continue;
1544
1545 intel_engine_pm_get(engine);
1546 for (i = 0; i < ARRAY_SIZE(poison); i++) {
1547 int result;
1548
1549 result = __lrc_isolation(engine, poison: poison[i]);
1550 if (result && !err)
1551 err = result;
1552
1553 result = __lrc_isolation(engine, poison: ~poison[i]);
1554 if (result && !err)
1555 err = result;
1556 }
1557 intel_engine_pm_put(engine);
1558 if (igt_flush_test(i915: gt->i915)) {
1559 err = -EIO;
1560 break;
1561 }
1562 }
1563
1564 return err;
1565}
1566
1567static int wabb_ctx_submit_req(struct intel_context *ce)
1568{
1569 struct i915_request *rq;
1570 int err = 0;
1571
1572 rq = intel_context_create_request(ce);
1573 if (IS_ERR(ptr: rq))
1574 return PTR_ERR(ptr: rq);
1575
1576 i915_request_get(rq);
1577 i915_request_add(rq);
1578
1579 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0)
1580 err = -ETIME;
1581
1582 i915_request_put(rq);
1583
1584 return err;
1585}
1586
1587#define CTX_BB_CANARY_OFFSET (3 * 1024)
1588#define CTX_BB_CANARY_INDEX (CTX_BB_CANARY_OFFSET / sizeof(u32))
1589
1590static u32 *
1591emit_wabb_ctx_canary(const struct intel_context *ce,
1592 u32 *cs, bool per_ctx)
1593{
1594 *cs++ = MI_STORE_REGISTER_MEM_GEN8 |
1595 MI_SRM_LRM_GLOBAL_GTT |
1596 MI_LRI_LRM_CS_MMIO;
1597 *cs++ = i915_mmio_reg_offset(RING_START(0));
1598 *cs++ = i915_ggtt_offset(vma: ce->state) +
1599 context_wa_bb_offset(ce) +
1600 CTX_BB_CANARY_OFFSET +
1601 (per_ctx ? PAGE_SIZE : 0);
1602 *cs++ = 0;
1603
1604 return cs;
1605}
1606
1607static u32 *
1608emit_indirect_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1609{
1610 return emit_wabb_ctx_canary(ce, cs, per_ctx: false);
1611}
1612
1613static u32 *
1614emit_per_ctx_bb_canary(const struct intel_context *ce, u32 *cs)
1615{
1616 return emit_wabb_ctx_canary(ce, cs, per_ctx: true);
1617}
1618
1619static void
1620wabb_ctx_setup(struct intel_context *ce, bool per_ctx)
1621{
1622 u32 *cs = context_wabb(ce, per_ctx);
1623
1624 cs[CTX_BB_CANARY_INDEX] = 0xdeadf00d;
1625
1626 if (per_ctx)
1627 setup_per_ctx_bb(ce, engine: ce->engine, emit: emit_per_ctx_bb_canary);
1628 else
1629 setup_indirect_ctx_bb(ce, engine: ce->engine, emit: emit_indirect_ctx_bb_canary);
1630}
1631
1632static bool check_ring_start(struct intel_context *ce, bool per_ctx)
1633{
1634 const u32 * const ctx_bb = (void *)(ce->lrc_reg_state) -
1635 LRC_STATE_OFFSET + context_wa_bb_offset(ce) +
1636 (per_ctx ? PAGE_SIZE : 0);
1637
1638 if (ctx_bb[CTX_BB_CANARY_INDEX] == ce->lrc_reg_state[CTX_RING_START])
1639 return true;
1640
1641 pr_err("ring start mismatch: canary 0x%08x vs state 0x%08x\n",
1642 ctx_bb[CTX_BB_CANARY_INDEX],
1643 ce->lrc_reg_state[CTX_RING_START]);
1644
1645 return false;
1646}
1647
1648static int wabb_ctx_check(struct intel_context *ce, bool per_ctx)
1649{
1650 int err;
1651
1652 err = wabb_ctx_submit_req(ce);
1653 if (err)
1654 return err;
1655
1656 if (!check_ring_start(ce, per_ctx))
1657 return -EINVAL;
1658
1659 return 0;
1660}
1661
1662static int __lrc_wabb_ctx(struct intel_engine_cs *engine, bool per_ctx)
1663{
1664 struct intel_context *a, *b;
1665 int err;
1666
1667 a = intel_context_create(engine);
1668 if (IS_ERR(ptr: a))
1669 return PTR_ERR(ptr: a);
1670 err = intel_context_pin(ce: a);
1671 if (err)
1672 goto put_a;
1673
1674 b = intel_context_create(engine);
1675 if (IS_ERR(ptr: b)) {
1676 err = PTR_ERR(ptr: b);
1677 goto unpin_a;
1678 }
1679 err = intel_context_pin(ce: b);
1680 if (err)
1681 goto put_b;
1682
1683 /* We use the already reserved extra page in context state */
1684 if (!a->wa_bb_page) {
1685 GEM_BUG_ON(b->wa_bb_page);
1686 GEM_BUG_ON(GRAPHICS_VER(engine->i915) == 12);
1687 goto unpin_b;
1688 }
1689
1690 /*
1691 * In order to test that our per context bb is truly per context,
1692 * and executes at the intended spot on context restoring process,
1693 * make the batch store the ring start value to memory.
1694 * As ring start is restored apriori of starting the indirect ctx bb and
1695 * as it will be different for each context, it fits to this purpose.
1696 */
1697 wabb_ctx_setup(ce: a, per_ctx);
1698 wabb_ctx_setup(ce: b, per_ctx);
1699
1700 err = wabb_ctx_check(ce: a, per_ctx);
1701 if (err)
1702 goto unpin_b;
1703
1704 err = wabb_ctx_check(ce: b, per_ctx);
1705
1706unpin_b:
1707 intel_context_unpin(ce: b);
1708put_b:
1709 intel_context_put(ce: b);
1710unpin_a:
1711 intel_context_unpin(ce: a);
1712put_a:
1713 intel_context_put(ce: a);
1714
1715 return err;
1716}
1717
1718static int lrc_wabb_ctx(void *arg, bool per_ctx)
1719{
1720 struct intel_gt *gt = arg;
1721 struct intel_engine_cs *engine;
1722 enum intel_engine_id id;
1723 int err = 0;
1724
1725 for_each_engine(engine, gt, id) {
1726 intel_engine_pm_get(engine);
1727 err = __lrc_wabb_ctx(engine, per_ctx);
1728 intel_engine_pm_put(engine);
1729
1730 if (igt_flush_test(i915: gt->i915))
1731 err = -EIO;
1732
1733 if (err)
1734 break;
1735 }
1736
1737 return err;
1738}
1739
1740static int live_lrc_indirect_ctx_bb(void *arg)
1741{
1742 return lrc_wabb_ctx(arg, per_ctx: false);
1743}
1744
1745static int live_lrc_per_ctx_bb(void *arg)
1746{
1747 return lrc_wabb_ctx(arg, per_ctx: true);
1748}
1749
1750static void garbage_reset(struct intel_engine_cs *engine,
1751 struct i915_request *rq)
1752{
1753 const unsigned int bit = I915_RESET_ENGINE + engine->id;
1754 unsigned long *lock = &engine->gt->reset.flags;
1755
1756 local_bh_disable();
1757 if (!test_and_set_bit(nr: bit, addr: lock)) {
1758 tasklet_disable(t: &engine->sched_engine->tasklet);
1759
1760 if (!rq->fence.error)
1761 __intel_engine_reset_bh(engine, NULL);
1762
1763 tasklet_enable(t: &engine->sched_engine->tasklet);
1764 clear_and_wake_up_bit(bit, word: lock);
1765 }
1766 local_bh_enable();
1767}
1768
1769static struct i915_request *garbage(struct intel_context *ce,
1770 struct rnd_state *prng)
1771{
1772 struct i915_request *rq;
1773 int err;
1774
1775 err = intel_context_pin(ce);
1776 if (err)
1777 return ERR_PTR(error: err);
1778
1779 prandom_bytes_state(state: prng,
1780 buf: ce->lrc_reg_state,
1781 nbytes: ce->engine->context_size -
1782 LRC_STATE_OFFSET);
1783
1784 rq = intel_context_create_request(ce);
1785 if (IS_ERR(ptr: rq)) {
1786 err = PTR_ERR(ptr: rq);
1787 goto err_unpin;
1788 }
1789
1790 i915_request_get(rq);
1791 i915_request_add(rq);
1792 return rq;
1793
1794err_unpin:
1795 intel_context_unpin(ce);
1796 return ERR_PTR(error: err);
1797}
1798
1799static int __lrc_garbage(struct intel_engine_cs *engine, struct rnd_state *prng)
1800{
1801 struct intel_context *ce;
1802 struct i915_request *hang;
1803 int err = 0;
1804
1805 ce = intel_context_create(engine);
1806 if (IS_ERR(ptr: ce))
1807 return PTR_ERR(ptr: ce);
1808
1809 hang = garbage(ce, prng);
1810 if (IS_ERR(ptr: hang)) {
1811 err = PTR_ERR(ptr: hang);
1812 goto err_ce;
1813 }
1814
1815 if (wait_for_submit(engine, rq: hang, HZ / 2)) {
1816 i915_request_put(rq: hang);
1817 err = -ETIME;
1818 goto err_ce;
1819 }
1820
1821 intel_context_set_banned(ce);
1822 garbage_reset(engine, rq: hang);
1823
1824 intel_engine_flush_submission(engine);
1825 if (!hang->fence.error) {
1826 i915_request_put(rq: hang);
1827 pr_err("%s: corrupted context was not reset\n",
1828 engine->name);
1829 err = -EINVAL;
1830 goto err_ce;
1831 }
1832
1833 if (i915_request_wait(rq: hang, flags: 0, HZ / 2) < 0) {
1834 pr_err("%s: corrupted context did not recover\n",
1835 engine->name);
1836 i915_request_put(rq: hang);
1837 err = -EIO;
1838 goto err_ce;
1839 }
1840 i915_request_put(rq: hang);
1841
1842err_ce:
1843 intel_context_put(ce);
1844 return err;
1845}
1846
1847static int live_lrc_garbage(void *arg)
1848{
1849 struct intel_gt *gt = arg;
1850 struct intel_engine_cs *engine;
1851 enum intel_engine_id id;
1852
1853 /*
1854 * Verify that we can recover if one context state is completely
1855 * corrupted.
1856 */
1857
1858 if (!IS_ENABLED(CONFIG_DRM_I915_SELFTEST_BROKEN))
1859 return 0;
1860
1861 for_each_engine(engine, gt, id) {
1862 I915_RND_STATE(prng);
1863 int err = 0, i;
1864
1865 if (!intel_has_reset_engine(gt: engine->gt))
1866 continue;
1867
1868 intel_engine_pm_get(engine);
1869 for (i = 0; i < 3; i++) {
1870 err = __lrc_garbage(engine, prng: &prng);
1871 if (err)
1872 break;
1873 }
1874 intel_engine_pm_put(engine);
1875
1876 if (igt_flush_test(i915: gt->i915))
1877 err = -EIO;
1878 if (err)
1879 return err;
1880 }
1881
1882 return 0;
1883}
1884
1885static int __live_pphwsp_runtime(struct intel_engine_cs *engine)
1886{
1887 struct intel_context *ce;
1888 struct i915_request *rq;
1889 IGT_TIMEOUT(end_time);
1890 int err;
1891
1892 ce = intel_context_create(engine);
1893 if (IS_ERR(ptr: ce))
1894 return PTR_ERR(ptr: ce);
1895
1896 ce->stats.runtime.num_underflow = 0;
1897 ce->stats.runtime.max_underflow = 0;
1898
1899 do {
1900 unsigned int loop = 1024;
1901
1902 while (loop) {
1903 rq = intel_context_create_request(ce);
1904 if (IS_ERR(ptr: rq)) {
1905 err = PTR_ERR(ptr: rq);
1906 goto err_rq;
1907 }
1908
1909 if (--loop == 0)
1910 i915_request_get(rq);
1911
1912 i915_request_add(rq);
1913 }
1914
1915 if (__igt_timeout(timeout: end_time, NULL))
1916 break;
1917
1918 i915_request_put(rq);
1919 } while (1);
1920
1921 err = i915_request_wait(rq, flags: 0, HZ / 5);
1922 if (err < 0) {
1923 pr_err("%s: request not completed!\n", engine->name);
1924 goto err_wait;
1925 }
1926
1927 igt_flush_test(i915: engine->i915);
1928
1929 pr_info("%s: pphwsp runtime %lluns, average %lluns\n",
1930 engine->name,
1931 intel_context_get_total_runtime_ns(ce),
1932 intel_context_get_avg_runtime_ns(ce));
1933
1934 err = 0;
1935 if (ce->stats.runtime.num_underflow) {
1936 pr_err("%s: pphwsp underflow %u time(s), max %u cycles!\n",
1937 engine->name,
1938 ce->stats.runtime.num_underflow,
1939 ce->stats.runtime.max_underflow);
1940 GEM_TRACE_DUMP();
1941 err = -EOVERFLOW;
1942 }
1943
1944err_wait:
1945 i915_request_put(rq);
1946err_rq:
1947 intel_context_put(ce);
1948 return err;
1949}
1950
1951static int live_pphwsp_runtime(void *arg)
1952{
1953 struct intel_gt *gt = arg;
1954 struct intel_engine_cs *engine;
1955 enum intel_engine_id id;
1956 int err = 0;
1957
1958 /*
1959 * Check that cumulative context runtime as stored in the pphwsp[16]
1960 * is monotonic.
1961 */
1962
1963 for_each_engine(engine, gt, id) {
1964 err = __live_pphwsp_runtime(engine);
1965 if (err)
1966 break;
1967 }
1968
1969 if (igt_flush_test(i915: gt->i915))
1970 err = -EIO;
1971
1972 return err;
1973}
1974
1975int intel_lrc_live_selftests(struct drm_i915_private *i915)
1976{
1977 static const struct i915_subtest tests[] = {
1978 SUBTEST(live_lrc_layout),
1979 SUBTEST(live_lrc_fixed),
1980 SUBTEST(live_lrc_state),
1981 SUBTEST(live_lrc_gpr),
1982 SUBTEST(live_lrc_isolation),
1983 SUBTEST(live_lrc_timestamp),
1984 SUBTEST(live_lrc_garbage),
1985 SUBTEST(live_pphwsp_runtime),
1986 SUBTEST(live_lrc_indirect_ctx_bb),
1987 SUBTEST(live_lrc_per_ctx_bb),
1988 };
1989
1990 if (!HAS_LOGICAL_RING_CONTEXTS(i915))
1991 return 0;
1992
1993 return intel_gt_live_subtests(tests, to_gt(i915));
1994}
1995

source code of linux/drivers/gpu/drm/i915/gt/selftest_lrc.c