1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include <linux/pm_qos.h>
26#include <linux/prime_numbers.h>
27#include <linux/sort.h>
28
29#include <drm/drm_print.h>
30
31#include "gem/i915_gem_internal.h"
32#include "gem/i915_gem_pm.h"
33#include "gem/selftests/mock_context.h"
34#include "gt/intel_engine_heartbeat.h"
35#include "gt/intel_engine_pm.h"
36#include "gt/intel_engine_user.h"
37#include "gt/intel_gt.h"
38#include "gt/intel_gt_clock_utils.h"
39#include "gt/intel_gt_requests.h"
40#include "gt/selftest_engine_heartbeat.h"
41
42#include "i915_random.h"
43#include "i915_selftest.h"
44#include "i915_wait_util.h"
45#include "igt_flush_test.h"
46#include "igt_live_test.h"
47#include "igt_spinner.h"
48#include "lib_sw_fence.h"
49#include "mock_drm.h"
50#include "mock_gem_device.h"
51
52static unsigned int num_uabi_engines(struct drm_i915_private *i915)
53{
54 struct intel_engine_cs *engine;
55 unsigned int count;
56
57 count = 0;
58 for_each_uabi_engine(engine, i915)
59 count++;
60
61 return count;
62}
63
64static struct intel_engine_cs *rcs0(struct drm_i915_private *i915)
65{
66 return intel_engine_lookup_user(i915, class: I915_ENGINE_CLASS_RENDER, instance: 0);
67}
68
69static int igt_add_request(void *arg)
70{
71 struct drm_i915_private *i915 = arg;
72 struct i915_request *request;
73
74 /* Basic preliminary test to create a request and let it loose! */
75
76 request = mock_request(ce: rcs0(i915)->kernel_context, HZ / 10);
77 if (IS_ERR(ptr: request))
78 return PTR_ERR(ptr: request);
79
80 i915_request_add(rq: request);
81
82 return 0;
83}
84
85static int igt_wait_request(void *arg)
86{
87 const long T = HZ / 4;
88 struct drm_i915_private *i915 = arg;
89 struct i915_request *request;
90 int err = -EINVAL;
91
92 /* Submit a request, then wait upon it */
93
94 request = mock_request(ce: rcs0(i915)->kernel_context, delay: T);
95 if (IS_ERR(ptr: request))
96 return PTR_ERR(ptr: request);
97
98 i915_request_get(rq: request);
99
100 if (i915_request_wait(rq: request, flags: 0, timeout: 0) != -ETIME) {
101 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
102 goto out_request;
103 }
104
105 if (i915_request_wait(rq: request, flags: 0, timeout: T) != -ETIME) {
106 pr_err("request wait succeeded (expected timeout before submit!)\n");
107 goto out_request;
108 }
109
110 if (i915_request_completed(rq: request)) {
111 pr_err("request completed before submit!!\n");
112 goto out_request;
113 }
114
115 i915_request_add(rq: request);
116
117 if (i915_request_wait(rq: request, flags: 0, timeout: 0) != -ETIME) {
118 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
119 goto out_request;
120 }
121
122 if (i915_request_completed(rq: request)) {
123 pr_err("request completed immediately!\n");
124 goto out_request;
125 }
126
127 if (i915_request_wait(rq: request, flags: 0, timeout: T / 2) != -ETIME) {
128 pr_err("request wait succeeded (expected timeout!)\n");
129 goto out_request;
130 }
131
132 if (i915_request_wait(rq: request, flags: 0, timeout: T) == -ETIME) {
133 pr_err("request wait timed out!\n");
134 goto out_request;
135 }
136
137 if (!i915_request_completed(rq: request)) {
138 pr_err("request not complete after waiting!\n");
139 goto out_request;
140 }
141
142 if (i915_request_wait(rq: request, flags: 0, timeout: T) == -ETIME) {
143 pr_err("request wait timed out when already complete!\n");
144 goto out_request;
145 }
146
147 err = 0;
148out_request:
149 i915_request_put(rq: request);
150 mock_device_flush(i915);
151 return err;
152}
153
154static int igt_fence_wait(void *arg)
155{
156 const long T = HZ / 4;
157 struct drm_i915_private *i915 = arg;
158 struct i915_request *request;
159 int err = -EINVAL;
160
161 /* Submit a request, treat it as a fence and wait upon it */
162
163 request = mock_request(ce: rcs0(i915)->kernel_context, delay: T);
164 if (IS_ERR(ptr: request))
165 return PTR_ERR(ptr: request);
166
167 if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) != -ETIME) {
168 pr_err("fence wait success before submit (expected timeout)!\n");
169 goto out;
170 }
171
172 i915_request_add(rq: request);
173
174 if (dma_fence_is_signaled(fence: &request->fence)) {
175 pr_err("fence signaled immediately!\n");
176 goto out;
177 }
178
179 if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T / 2) != -ETIME) {
180 pr_err("fence wait success after submit (expected timeout)!\n");
181 goto out;
182 }
183
184 if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) <= 0) {
185 pr_err("fence wait timed out (expected success)!\n");
186 goto out;
187 }
188
189 if (!dma_fence_is_signaled(fence: &request->fence)) {
190 pr_err("fence unsignaled after waiting!\n");
191 goto out;
192 }
193
194 if (dma_fence_wait_timeout(&request->fence, intr: false, timeout: T) <= 0) {
195 pr_err("fence wait timed out when complete (expected success)!\n");
196 goto out;
197 }
198
199 err = 0;
200out:
201 mock_device_flush(i915);
202 return err;
203}
204
205static int igt_request_rewind(void *arg)
206{
207 struct drm_i915_private *i915 = arg;
208 struct i915_request *request, *vip;
209 struct i915_gem_context *ctx[2];
210 struct intel_context *ce;
211 int err = -EINVAL;
212
213 ctx[0] = mock_context(i915, name: "A");
214 if (!ctx[0]) {
215 err = -ENOMEM;
216 goto err_ctx_0;
217 }
218
219 ce = i915_gem_context_get_engine(ctx: ctx[0], idx: RCS0);
220 GEM_BUG_ON(IS_ERR(ce));
221 request = mock_request(ce, delay: 2 * HZ);
222 intel_context_put(ce);
223 if (IS_ERR(ptr: request)) {
224 err = PTR_ERR(ptr: request);
225 goto err_context_0;
226 }
227
228 i915_request_get(rq: request);
229 i915_request_add(rq: request);
230
231 ctx[1] = mock_context(i915, name: "B");
232 if (!ctx[1]) {
233 err = -ENOMEM;
234 goto err_ctx_1;
235 }
236
237 ce = i915_gem_context_get_engine(ctx: ctx[1], idx: RCS0);
238 GEM_BUG_ON(IS_ERR(ce));
239 vip = mock_request(ce, delay: 0);
240 intel_context_put(ce);
241 if (IS_ERR(ptr: vip)) {
242 err = PTR_ERR(ptr: vip);
243 goto err_context_1;
244 }
245
246 /* Simulate preemption by manual reordering */
247 if (!mock_cancel_request(request)) {
248 pr_err("failed to cancel request (already executed)!\n");
249 i915_request_add(rq: vip);
250 goto err_context_1;
251 }
252 i915_request_get(rq: vip);
253 i915_request_add(rq: vip);
254 rcu_read_lock();
255 request->engine->submit_request(request);
256 rcu_read_unlock();
257
258
259 if (i915_request_wait(rq: vip, flags: 0, HZ) == -ETIME) {
260 pr_err("timed out waiting for high priority request\n");
261 goto err;
262 }
263
264 if (i915_request_completed(rq: request)) {
265 pr_err("low priority request already completed\n");
266 goto err;
267 }
268
269 err = 0;
270err:
271 i915_request_put(rq: vip);
272err_context_1:
273 mock_context_close(ctx: ctx[1]);
274err_ctx_1:
275 i915_request_put(rq: request);
276err_context_0:
277 mock_context_close(ctx: ctx[0]);
278err_ctx_0:
279 mock_device_flush(i915);
280 return err;
281}
282
283struct smoketest {
284 struct intel_engine_cs *engine;
285 struct i915_gem_context **contexts;
286 atomic_long_t num_waits, num_fences;
287 int ncontexts, max_batch;
288 struct i915_request *(*request_alloc)(struct intel_context *ce);
289};
290
291static struct i915_request *
292__mock_request_alloc(struct intel_context *ce)
293{
294 return mock_request(ce, delay: 0);
295}
296
297static struct i915_request *
298__live_request_alloc(struct intel_context *ce)
299{
300 return intel_context_create_request(ce);
301}
302
303struct smoke_thread {
304 struct kthread_worker *worker;
305 struct kthread_work work;
306 struct smoketest *t;
307 bool stop;
308 int result;
309};
310
311static void __igt_breadcrumbs_smoketest(struct kthread_work *work)
312{
313 struct smoke_thread *thread = container_of(work, typeof(*thread), work);
314 struct smoketest *t = thread->t;
315 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
316 const unsigned int total = 4 * t->ncontexts + 1;
317 unsigned int num_waits = 0, num_fences = 0;
318 struct i915_request **requests;
319 I915_RND_STATE(prng);
320 unsigned int *order;
321 int err = 0;
322
323 /*
324 * A very simple test to catch the most egregious of list handling bugs.
325 *
326 * At its heart, we simply create oodles of requests running across
327 * multiple kthreads and enable signaling on them, for the sole purpose
328 * of stressing our breadcrumb handling. The only inspection we do is
329 * that the fences were marked as signaled.
330 */
331
332 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
333 if (!requests) {
334 thread->result = -ENOMEM;
335 return;
336 }
337
338 order = i915_random_order(count: total, state: &prng);
339 if (!order) {
340 err = -ENOMEM;
341 goto out_requests;
342 }
343
344 while (!READ_ONCE(thread->stop)) {
345 struct i915_sw_fence *submit, *wait;
346 unsigned int n, count;
347
348 submit = heap_fence_create(GFP_KERNEL);
349 if (!submit) {
350 err = -ENOMEM;
351 break;
352 }
353
354 wait = heap_fence_create(GFP_KERNEL);
355 if (!wait) {
356 i915_sw_fence_commit(fence: submit);
357 heap_fence_put(fence: submit);
358 err = -ENOMEM;
359 break;
360 }
361
362 i915_random_reorder(order, count: total, state: &prng);
363 count = 1 + i915_prandom_u32_max_state(ep_ro: max_batch, state: &prng);
364
365 for (n = 0; n < count; n++) {
366 struct i915_gem_context *ctx =
367 t->contexts[order[n] % t->ncontexts];
368 struct i915_request *rq;
369 struct intel_context *ce;
370
371 ce = i915_gem_context_get_engine(ctx, idx: t->engine->legacy_idx);
372 GEM_BUG_ON(IS_ERR(ce));
373 rq = t->request_alloc(ce);
374 intel_context_put(ce);
375 if (IS_ERR(ptr: rq)) {
376 err = PTR_ERR(ptr: rq);
377 count = n;
378 break;
379 }
380
381 err = i915_sw_fence_await_sw_fence_gfp(fence: &rq->submit,
382 after: submit,
383 GFP_KERNEL);
384
385 requests[n] = i915_request_get(rq);
386 i915_request_add(rq);
387
388 if (err >= 0)
389 err = i915_sw_fence_await_dma_fence(fence: wait,
390 dma: &rq->fence,
391 timeout: 0,
392 GFP_KERNEL);
393
394 if (err < 0) {
395 i915_request_put(rq);
396 count = n;
397 break;
398 }
399 }
400
401 i915_sw_fence_commit(fence: submit);
402 i915_sw_fence_commit(fence: wait);
403
404 if (!wait_event_timeout(wait->wait,
405 i915_sw_fence_done(wait),
406 5 * HZ)) {
407 struct i915_request *rq = requests[count - 1];
408
409 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
410 atomic_read(&wait->pending), count,
411 rq->fence.context, rq->fence.seqno,
412 t->engine->name);
413 GEM_TRACE_DUMP();
414
415 intel_gt_set_wedged(gt: t->engine->gt);
416 GEM_BUG_ON(!i915_request_completed(rq));
417 i915_sw_fence_wait(fence: wait);
418 err = -EIO;
419 }
420
421 for (n = 0; n < count; n++) {
422 struct i915_request *rq = requests[n];
423
424 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
425 &rq->fence.flags)) {
426 pr_err("%llu:%llu was not signaled!\n",
427 rq->fence.context, rq->fence.seqno);
428 err = -EINVAL;
429 }
430
431 i915_request_put(rq);
432 }
433
434 heap_fence_put(fence: wait);
435 heap_fence_put(fence: submit);
436
437 if (err < 0)
438 break;
439
440 num_fences += count;
441 num_waits++;
442
443 cond_resched();
444 }
445
446 atomic_long_add(i: num_fences, v: &t->num_fences);
447 atomic_long_add(i: num_waits, v: &t->num_waits);
448
449 kfree(objp: order);
450out_requests:
451 kfree(objp: requests);
452 thread->result = err;
453}
454
455static int mock_breadcrumbs_smoketest(void *arg)
456{
457 struct drm_i915_private *i915 = arg;
458 struct smoketest t = {
459 .engine = rcs0(i915),
460 .ncontexts = 1024,
461 .max_batch = 1024,
462 .request_alloc = __mock_request_alloc
463 };
464 unsigned int ncpus = num_online_cpus();
465 struct smoke_thread *threads;
466 unsigned int n;
467 int ret = 0;
468
469 /*
470 * Smoketest our breadcrumb/signal handling for requests across multiple
471 * threads. A very simple test to only catch the most egregious of bugs.
472 * See __igt_breadcrumbs_smoketest();
473 */
474
475 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
476 if (!threads)
477 return -ENOMEM;
478
479 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
480 if (!t.contexts) {
481 ret = -ENOMEM;
482 goto out_threads;
483 }
484
485 for (n = 0; n < t.ncontexts; n++) {
486 t.contexts[n] = mock_context(i915: t.engine->i915, name: "mock");
487 if (!t.contexts[n]) {
488 ret = -ENOMEM;
489 goto out_contexts;
490 }
491 }
492
493 for (n = 0; n < ncpus; n++) {
494 struct kthread_worker *worker;
495
496 worker = kthread_run_worker(0, "igt/%d", n);
497 if (IS_ERR(ptr: worker)) {
498 ret = PTR_ERR(ptr: worker);
499 ncpus = n;
500 break;
501 }
502
503 threads[n].worker = worker;
504 threads[n].t = &t;
505 threads[n].stop = false;
506 threads[n].result = 0;
507
508 kthread_init_work(&threads[n].work,
509 __igt_breadcrumbs_smoketest);
510 kthread_queue_work(worker, work: &threads[n].work);
511 }
512
513 msleep(msecs: jiffies_to_msecs(j: i915_selftest.timeout_jiffies));
514
515 for (n = 0; n < ncpus; n++) {
516 int err;
517
518 WRITE_ONCE(threads[n].stop, true);
519 kthread_flush_work(work: &threads[n].work);
520 err = READ_ONCE(threads[n].result);
521 if (err < 0 && !ret)
522 ret = err;
523
524 kthread_destroy_worker(worker: threads[n].worker);
525 }
526 pr_info("Completed %lu waits for %lu fence across %d cpus\n",
527 atomic_long_read(&t.num_waits),
528 atomic_long_read(&t.num_fences),
529 ncpus);
530
531out_contexts:
532 for (n = 0; n < t.ncontexts; n++) {
533 if (!t.contexts[n])
534 break;
535 mock_context_close(ctx: t.contexts[n]);
536 }
537 kfree(objp: t.contexts);
538out_threads:
539 kfree(objp: threads);
540 return ret;
541}
542
543int i915_request_mock_selftests(void)
544{
545 static const struct i915_subtest tests[] = {
546 SUBTEST(igt_add_request),
547 SUBTEST(igt_wait_request),
548 SUBTEST(igt_fence_wait),
549 SUBTEST(igt_request_rewind),
550 SUBTEST(mock_breadcrumbs_smoketest),
551 };
552 struct drm_i915_private *i915;
553 intel_wakeref_t wakeref;
554 int err = 0;
555
556 i915 = mock_gem_device();
557 if (!i915)
558 return -ENOMEM;
559
560 with_intel_runtime_pm(&i915->runtime_pm, wakeref)
561 err = i915_subtests(tests, i915);
562
563 mock_destroy_device(i915);
564
565 return err;
566}
567
568static int live_nop_request(void *arg)
569{
570 struct drm_i915_private *i915 = arg;
571 struct intel_engine_cs *engine;
572 struct igt_live_test t;
573 int err = -ENODEV;
574
575 /*
576 * Submit various sized batches of empty requests, to each engine
577 * (individually), and wait for the batch to complete. We can check
578 * the overhead of submitting requests to the hardware.
579 */
580
581 for_each_uabi_engine(engine, i915) {
582 unsigned long n, prime;
583 IGT_TIMEOUT(end_time);
584 ktime_t times[2] = {};
585
586 err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name);
587 if (err)
588 return err;
589
590 intel_engine_pm_get(engine);
591 for_each_prime_number_from(prime, 1, 8192) {
592 struct i915_request *request = NULL;
593
594 times[1] = ktime_get_raw();
595
596 for (n = 0; n < prime; n++) {
597 i915_request_put(rq: request);
598 request = i915_request_create(ce: engine->kernel_context);
599 if (IS_ERR(ptr: request))
600 return PTR_ERR(ptr: request);
601
602 /*
603 * This space is left intentionally blank.
604 *
605 * We do not actually want to perform any
606 * action with this request, we just want
607 * to measure the latency in allocation
608 * and submission of our breadcrumbs -
609 * ensuring that the bare request is sufficient
610 * for the system to work (i.e. proper HEAD
611 * tracking of the rings, interrupt handling,
612 * etc). It also gives us the lowest bounds
613 * for latency.
614 */
615
616 i915_request_get(rq: request);
617 i915_request_add(rq: request);
618 }
619 i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT);
620 i915_request_put(rq: request);
621
622 times[1] = ktime_sub(ktime_get_raw(), times[1]);
623 if (prime == 1)
624 times[0] = times[1];
625
626 if (__igt_timeout(timeout: end_time, NULL))
627 break;
628 }
629 intel_engine_pm_put(engine);
630
631 err = igt_live_test_end(t: &t);
632 if (err)
633 return err;
634
635 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
636 engine->name,
637 ktime_to_ns(times[0]),
638 prime, div64_u64(ktime_to_ns(times[1]), prime));
639 }
640
641 return err;
642}
643
644static int __cancel_inactive(struct intel_engine_cs *engine)
645{
646 struct intel_context *ce;
647 struct igt_spinner spin;
648 struct i915_request *rq;
649 int err = 0;
650
651 if (igt_spinner_init(spin: &spin, gt: engine->gt))
652 return -ENOMEM;
653
654 ce = intel_context_create(engine);
655 if (IS_ERR(ptr: ce)) {
656 err = PTR_ERR(ptr: ce);
657 goto out_spin;
658 }
659
660 rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK);
661 if (IS_ERR(ptr: rq)) {
662 err = PTR_ERR(ptr: rq);
663 goto out_ce;
664 }
665
666 pr_debug("%s: Cancelling inactive request\n", engine->name);
667 i915_request_cancel(rq, error: -EINTR);
668 i915_request_get(rq);
669 i915_request_add(rq);
670
671 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) {
672 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
673
674 pr_err("%s: Failed to cancel inactive request\n", engine->name);
675 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
676 err = -ETIME;
677 goto out_rq;
678 }
679
680 if (rq->fence.error != -EINTR) {
681 pr_err("%s: fence not cancelled (%u)\n",
682 engine->name, rq->fence.error);
683 err = -EINVAL;
684 }
685
686out_rq:
687 i915_request_put(rq);
688out_ce:
689 intel_context_put(ce);
690out_spin:
691 igt_spinner_fini(spin: &spin);
692 if (err)
693 pr_err("%s: %s error %d\n", __func__, engine->name, err);
694 return err;
695}
696
697static int __cancel_active(struct intel_engine_cs *engine)
698{
699 struct intel_context *ce;
700 struct igt_spinner spin;
701 struct i915_request *rq;
702 int err = 0;
703
704 if (igt_spinner_init(spin: &spin, gt: engine->gt))
705 return -ENOMEM;
706
707 ce = intel_context_create(engine);
708 if (IS_ERR(ptr: ce)) {
709 err = PTR_ERR(ptr: ce);
710 goto out_spin;
711 }
712
713 rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK);
714 if (IS_ERR(ptr: rq)) {
715 err = PTR_ERR(ptr: rq);
716 goto out_ce;
717 }
718
719 pr_debug("%s: Cancelling active request\n", engine->name);
720 i915_request_get(rq);
721 i915_request_add(rq);
722 if (!igt_wait_for_spinner(spin: &spin, rq)) {
723 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
724
725 pr_err("Failed to start spinner on %s\n", engine->name);
726 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
727 err = -ETIME;
728 goto out_rq;
729 }
730 i915_request_cancel(rq, error: -EINTR);
731
732 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) {
733 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
734
735 pr_err("%s: Failed to cancel active request\n", engine->name);
736 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
737 err = -ETIME;
738 goto out_rq;
739 }
740
741 if (rq->fence.error != -EINTR) {
742 pr_err("%s: fence not cancelled (%u)\n",
743 engine->name, rq->fence.error);
744 err = -EINVAL;
745 }
746
747out_rq:
748 i915_request_put(rq);
749out_ce:
750 intel_context_put(ce);
751out_spin:
752 igt_spinner_fini(spin: &spin);
753 if (err)
754 pr_err("%s: %s error %d\n", __func__, engine->name, err);
755 return err;
756}
757
758static int __cancel_completed(struct intel_engine_cs *engine)
759{
760 struct intel_context *ce;
761 struct igt_spinner spin;
762 struct i915_request *rq;
763 int err = 0;
764
765 if (igt_spinner_init(spin: &spin, gt: engine->gt))
766 return -ENOMEM;
767
768 ce = intel_context_create(engine);
769 if (IS_ERR(ptr: ce)) {
770 err = PTR_ERR(ptr: ce);
771 goto out_spin;
772 }
773
774 rq = igt_spinner_create_request(spin: &spin, ce, MI_ARB_CHECK);
775 if (IS_ERR(ptr: rq)) {
776 err = PTR_ERR(ptr: rq);
777 goto out_ce;
778 }
779 igt_spinner_end(spin: &spin);
780 i915_request_get(rq);
781 i915_request_add(rq);
782
783 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0) {
784 err = -ETIME;
785 goto out_rq;
786 }
787
788 pr_debug("%s: Cancelling completed request\n", engine->name);
789 i915_request_cancel(rq, error: -EINTR);
790 if (rq->fence.error) {
791 pr_err("%s: fence not cancelled (%u)\n",
792 engine->name, rq->fence.error);
793 err = -EINVAL;
794 }
795
796out_rq:
797 i915_request_put(rq);
798out_ce:
799 intel_context_put(ce);
800out_spin:
801 igt_spinner_fini(spin: &spin);
802 if (err)
803 pr_err("%s: %s error %d\n", __func__, engine->name, err);
804 return err;
805}
806
807/*
808 * Test to prove a non-preemptable request can be cancelled and a subsequent
809 * request on the same context can successfully complete after cancellation.
810 *
811 * Testing methodology is to create a non-preemptible request and submit it,
812 * wait for spinner to start, create a NOP request and submit it, cancel the
813 * spinner, wait for spinner to complete and verify it failed with an error,
814 * finally wait for NOP request to complete verify it succeeded without an
815 * error. Preemption timeout also reduced / restored so test runs in a timely
816 * maner.
817 */
818static int __cancel_reset(struct drm_i915_private *i915,
819 struct intel_engine_cs *engine)
820{
821 struct intel_context *ce;
822 struct igt_spinner spin;
823 struct i915_request *rq, *nop;
824 unsigned long preempt_timeout_ms;
825 int err = 0;
826
827 if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT ||
828 !intel_has_reset_engine(gt: engine->gt))
829 return 0;
830
831 preempt_timeout_ms = engine->props.preempt_timeout_ms;
832 engine->props.preempt_timeout_ms = 100;
833
834 if (igt_spinner_init(spin: &spin, gt: engine->gt))
835 goto out_restore;
836
837 ce = intel_context_create(engine);
838 if (IS_ERR(ptr: ce)) {
839 err = PTR_ERR(ptr: ce);
840 goto out_spin;
841 }
842
843 rq = igt_spinner_create_request(spin: &spin, ce, MI_NOOP);
844 if (IS_ERR(ptr: rq)) {
845 err = PTR_ERR(ptr: rq);
846 goto out_ce;
847 }
848
849 pr_debug("%s: Cancelling active non-preemptable request\n",
850 engine->name);
851 i915_request_get(rq);
852 i915_request_add(rq);
853 if (!igt_wait_for_spinner(spin: &spin, rq)) {
854 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
855
856 pr_err("Failed to start spinner on %s\n", engine->name);
857 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
858 err = -ETIME;
859 goto out_rq;
860 }
861
862 nop = intel_context_create_request(ce);
863 if (IS_ERR(ptr: nop))
864 goto out_rq;
865 i915_request_get(rq: nop);
866 i915_request_add(rq: nop);
867
868 i915_request_cancel(rq, error: -EINTR);
869
870 if (i915_request_wait(rq, flags: 0, HZ) < 0) {
871 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
872
873 pr_err("%s: Failed to cancel hung request\n", engine->name);
874 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
875 err = -ETIME;
876 goto out_nop;
877 }
878
879 if (rq->fence.error != -EINTR) {
880 pr_err("%s: fence not cancelled (%u)\n",
881 engine->name, rq->fence.error);
882 err = -EINVAL;
883 goto out_nop;
884 }
885
886 if (i915_request_wait(rq: nop, flags: 0, HZ) < 0) {
887 struct drm_printer p = drm_info_printer(dev: engine->i915->drm.dev);
888
889 pr_err("%s: Failed to complete nop request\n", engine->name);
890 intel_engine_dump(engine, m: &p, header: "%s\n", engine->name);
891 err = -ETIME;
892 goto out_nop;
893 }
894
895 if (nop->fence.error != 0) {
896 pr_err("%s: Nop request errored (%u)\n",
897 engine->name, nop->fence.error);
898 err = -EINVAL;
899 }
900
901out_nop:
902 i915_request_put(rq: nop);
903out_rq:
904 i915_request_put(rq);
905out_ce:
906 intel_context_put(ce);
907out_spin:
908 igt_spinner_fini(spin: &spin);
909out_restore:
910 engine->props.preempt_timeout_ms = preempt_timeout_ms;
911 if (err)
912 pr_err("%s: %s error %d\n", __func__, engine->name, err);
913 return err;
914}
915
916static int live_cancel_request(void *arg)
917{
918 struct drm_i915_private *i915 = arg;
919 struct intel_engine_cs *engine;
920
921 /*
922 * Check cancellation of requests. We expect to be able to immediately
923 * cancel active requests, even if they are currently on the GPU.
924 */
925
926 for_each_uabi_engine(engine, i915) {
927 struct igt_live_test t;
928 int err, err2;
929
930 if (!intel_engine_has_preemption(engine))
931 continue;
932
933 err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name);
934 if (err)
935 return err;
936
937 err = __cancel_inactive(engine);
938 if (err == 0)
939 err = __cancel_active(engine);
940 if (err == 0)
941 err = __cancel_completed(engine);
942
943 err2 = igt_live_test_end(t: &t);
944 if (err)
945 return err;
946 if (err2)
947 return err2;
948
949 /* Expects reset so call outside of igt_live_test_* */
950 err = __cancel_reset(i915, engine);
951 if (err)
952 return err;
953
954 if (igt_flush_test(i915))
955 return -EIO;
956 }
957
958 return 0;
959}
960
961static struct i915_vma *empty_batch(struct intel_gt *gt)
962{
963 struct drm_i915_gem_object *obj;
964 struct i915_vma *vma;
965 u32 *cmd;
966 int err;
967
968 obj = i915_gem_object_create_internal(i915: gt->i915, PAGE_SIZE);
969 if (IS_ERR(ptr: obj))
970 return ERR_CAST(ptr: obj);
971
972 cmd = i915_gem_object_pin_map_unlocked(obj, type: I915_MAP_WC);
973 if (IS_ERR(ptr: cmd)) {
974 err = PTR_ERR(ptr: cmd);
975 goto err;
976 }
977
978 *cmd = MI_BATCH_BUFFER_END;
979
980 __i915_gem_object_flush_map(obj, offset: 0, size: 64);
981 i915_gem_object_unpin_map(obj);
982
983 intel_gt_chipset_flush(gt);
984
985 vma = i915_vma_instance(obj, vm: gt->vm, NULL);
986 if (IS_ERR(ptr: vma)) {
987 err = PTR_ERR(ptr: vma);
988 goto err;
989 }
990
991 err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER);
992 if (err)
993 goto err;
994
995 /* Force the wait now to avoid including it in the benchmark */
996 err = i915_vma_sync(vma);
997 if (err)
998 goto err_pin;
999
1000 return vma;
1001
1002err_pin:
1003 i915_vma_unpin(vma);
1004err:
1005 i915_gem_object_put(obj);
1006 return ERR_PTR(error: err);
1007}
1008
1009static int emit_bb_start(struct i915_request *rq, struct i915_vma *batch)
1010{
1011 return rq->engine->emit_bb_start(rq,
1012 i915_vma_offset(vma: batch),
1013 i915_vma_size(vma: batch),
1014 0);
1015}
1016
1017static struct i915_request *
1018empty_request(struct intel_engine_cs *engine,
1019 struct i915_vma *batch)
1020{
1021 struct i915_request *request;
1022 int err;
1023
1024 request = i915_request_create(ce: engine->kernel_context);
1025 if (IS_ERR(ptr: request))
1026 return request;
1027
1028 err = emit_bb_start(rq: request, batch);
1029 if (err)
1030 goto out_request;
1031
1032 i915_request_get(rq: request);
1033out_request:
1034 i915_request_add(rq: request);
1035 return err ? ERR_PTR(error: err) : request;
1036}
1037
1038static int live_empty_request(void *arg)
1039{
1040 struct drm_i915_private *i915 = arg;
1041 struct intel_engine_cs *engine;
1042 struct igt_live_test t;
1043 int err;
1044
1045 /*
1046 * Submit various sized batches of empty requests, to each engine
1047 * (individually), and wait for the batch to complete. We can check
1048 * the overhead of submitting requests to the hardware.
1049 */
1050
1051 for_each_uabi_engine(engine, i915) {
1052 IGT_TIMEOUT(end_time);
1053 struct i915_request *request;
1054 struct i915_vma *batch;
1055 unsigned long n, prime;
1056 ktime_t times[2] = {};
1057
1058 batch = empty_batch(gt: engine->gt);
1059 if (IS_ERR(ptr: batch))
1060 return PTR_ERR(ptr: batch);
1061
1062 err = igt_live_test_begin(t: &t, i915, func: __func__, name: engine->name);
1063 if (err)
1064 goto out_batch;
1065
1066 intel_engine_pm_get(engine);
1067
1068 /* Warmup / preload */
1069 request = empty_request(engine, batch);
1070 if (IS_ERR(ptr: request)) {
1071 err = PTR_ERR(ptr: request);
1072 intel_engine_pm_put(engine);
1073 goto out_batch;
1074 }
1075 i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT);
1076
1077 for_each_prime_number_from(prime, 1, 8192) {
1078 times[1] = ktime_get_raw();
1079
1080 for (n = 0; n < prime; n++) {
1081 i915_request_put(rq: request);
1082 request = empty_request(engine, batch);
1083 if (IS_ERR(ptr: request)) {
1084 err = PTR_ERR(ptr: request);
1085 intel_engine_pm_put(engine);
1086 goto out_batch;
1087 }
1088 }
1089 i915_request_wait(rq: request, flags: 0, MAX_SCHEDULE_TIMEOUT);
1090
1091 times[1] = ktime_sub(ktime_get_raw(), times[1]);
1092 if (prime == 1)
1093 times[0] = times[1];
1094
1095 if (__igt_timeout(timeout: end_time, NULL))
1096 break;
1097 }
1098 i915_request_put(rq: request);
1099 intel_engine_pm_put(engine);
1100
1101 err = igt_live_test_end(t: &t);
1102 if (err)
1103 goto out_batch;
1104
1105 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
1106 engine->name,
1107 ktime_to_ns(times[0]),
1108 prime, div64_u64(ktime_to_ns(times[1]), prime));
1109out_batch:
1110 i915_vma_unpin(vma: batch);
1111 i915_vma_put(vma: batch);
1112 if (err)
1113 break;
1114 }
1115
1116 return err;
1117}
1118
1119static struct i915_vma *recursive_batch(struct intel_gt *gt)
1120{
1121 struct drm_i915_gem_object *obj;
1122 const int ver = GRAPHICS_VER(gt->i915);
1123 struct i915_vma *vma;
1124 u32 *cmd;
1125 int err;
1126
1127 obj = i915_gem_object_create_internal(i915: gt->i915, PAGE_SIZE);
1128 if (IS_ERR(ptr: obj))
1129 return ERR_CAST(ptr: obj);
1130
1131 vma = i915_vma_instance(obj, vm: gt->vm, NULL);
1132 if (IS_ERR(ptr: vma)) {
1133 err = PTR_ERR(ptr: vma);
1134 goto err;
1135 }
1136
1137 err = i915_vma_pin(vma, size: 0, alignment: 0, PIN_USER);
1138 if (err)
1139 goto err;
1140
1141 cmd = i915_gem_object_pin_map_unlocked(obj, type: I915_MAP_WC);
1142 if (IS_ERR(ptr: cmd)) {
1143 err = PTR_ERR(ptr: cmd);
1144 goto err;
1145 }
1146
1147 if (ver >= 8) {
1148 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
1149 *cmd++ = lower_32_bits(i915_vma_offset(vma));
1150 *cmd++ = upper_32_bits(i915_vma_offset(vma));
1151 } else if (ver >= 6) {
1152 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
1153 *cmd++ = lower_32_bits(i915_vma_offset(vma));
1154 } else {
1155 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
1156 *cmd++ = lower_32_bits(i915_vma_offset(vma));
1157 }
1158 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
1159
1160 __i915_gem_object_flush_map(obj, offset: 0, size: 64);
1161 i915_gem_object_unpin_map(obj);
1162
1163 intel_gt_chipset_flush(gt);
1164
1165 return vma;
1166
1167err:
1168 i915_gem_object_put(obj);
1169 return ERR_PTR(error: err);
1170}
1171
1172static int recursive_batch_resolve(struct i915_vma *batch)
1173{
1174 u32 *cmd;
1175
1176 cmd = i915_gem_object_pin_map_unlocked(obj: batch->obj, type: I915_MAP_WC);
1177 if (IS_ERR(ptr: cmd))
1178 return PTR_ERR(ptr: cmd);
1179
1180 *cmd = MI_BATCH_BUFFER_END;
1181
1182 __i915_gem_object_flush_map(obj: batch->obj, offset: 0, size: sizeof(*cmd));
1183 i915_gem_object_unpin_map(obj: batch->obj);
1184
1185 intel_gt_chipset_flush(gt: batch->vm->gt);
1186
1187 return 0;
1188}
1189
1190static int live_all_engines(void *arg)
1191{
1192 struct drm_i915_private *i915 = arg;
1193 const unsigned int nengines = num_uabi_engines(i915);
1194 struct intel_engine_cs *engine;
1195 struct i915_request **request;
1196 struct igt_live_test t;
1197 unsigned int idx;
1198 int err;
1199
1200 /*
1201 * Check we can submit requests to all engines simultaneously. We
1202 * send a recursive batch to each engine - checking that we don't
1203 * block doing so, and that they don't complete too soon.
1204 */
1205
1206 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
1207 if (!request)
1208 return -ENOMEM;
1209
1210 err = igt_live_test_begin(t: &t, i915, func: __func__, name: "");
1211 if (err)
1212 goto out_free;
1213
1214 idx = 0;
1215 for_each_uabi_engine(engine, i915) {
1216 struct i915_vma *batch;
1217
1218 batch = recursive_batch(gt: engine->gt);
1219 if (IS_ERR(ptr: batch)) {
1220 err = PTR_ERR(ptr: batch);
1221 pr_err("%s: Unable to create batch, err=%d\n",
1222 __func__, err);
1223 goto out_free;
1224 }
1225
1226 i915_vma_lock(vma: batch);
1227 request[idx] = intel_engine_create_kernel_request(engine);
1228 if (IS_ERR(ptr: request[idx])) {
1229 err = PTR_ERR(ptr: request[idx]);
1230 pr_err("%s: Request allocation failed with err=%d\n",
1231 __func__, err);
1232 goto out_unlock;
1233 }
1234 GEM_BUG_ON(request[idx]->context->vm != batch->vm);
1235
1236 err = i915_vma_move_to_active(vma: batch, rq: request[idx], flags: 0);
1237 GEM_BUG_ON(err);
1238
1239 err = emit_bb_start(rq: request[idx], batch);
1240 GEM_BUG_ON(err);
1241 request[idx]->batch = batch;
1242
1243 i915_request_get(rq: request[idx]);
1244 i915_request_add(rq: request[idx]);
1245 idx++;
1246out_unlock:
1247 i915_vma_unlock(vma: batch);
1248 if (err)
1249 goto out_request;
1250 }
1251
1252 idx = 0;
1253 for_each_uabi_engine(engine, i915) {
1254 if (i915_request_completed(rq: request[idx])) {
1255 pr_err("%s(%s): request completed too early!\n",
1256 __func__, engine->name);
1257 err = -EINVAL;
1258 goto out_request;
1259 }
1260 idx++;
1261 }
1262
1263 idx = 0;
1264 for_each_uabi_engine(engine, i915) {
1265 err = recursive_batch_resolve(batch: request[idx]->batch);
1266 if (err) {
1267 pr_err("%s: failed to resolve batch, err=%d\n",
1268 __func__, err);
1269 goto out_request;
1270 }
1271 idx++;
1272 }
1273
1274 idx = 0;
1275 for_each_uabi_engine(engine, i915) {
1276 struct i915_request *rq = request[idx];
1277 long timeout;
1278
1279 timeout = i915_request_wait(rq, flags: 0,
1280 MAX_SCHEDULE_TIMEOUT);
1281 if (timeout < 0) {
1282 err = timeout;
1283 pr_err("%s: error waiting for request on %s, err=%d\n",
1284 __func__, engine->name, err);
1285 goto out_request;
1286 }
1287
1288 GEM_BUG_ON(!i915_request_completed(rq));
1289 i915_vma_unpin(vma: rq->batch);
1290 i915_vma_put(vma: rq->batch);
1291 i915_request_put(rq);
1292 request[idx] = NULL;
1293 idx++;
1294 }
1295
1296 err = igt_live_test_end(t: &t);
1297
1298out_request:
1299 idx = 0;
1300 for_each_uabi_engine(engine, i915) {
1301 struct i915_request *rq = request[idx];
1302
1303 if (!rq)
1304 continue;
1305
1306 if (rq->batch) {
1307 i915_vma_unpin(vma: rq->batch);
1308 i915_vma_put(vma: rq->batch);
1309 }
1310 i915_request_put(rq);
1311 idx++;
1312 }
1313out_free:
1314 kfree(objp: request);
1315 return err;
1316}
1317
1318static int live_sequential_engines(void *arg)
1319{
1320 struct drm_i915_private *i915 = arg;
1321 const unsigned int nengines = num_uabi_engines(i915);
1322 struct i915_request **request;
1323 struct i915_request *prev = NULL;
1324 struct intel_engine_cs *engine;
1325 struct igt_live_test t;
1326 unsigned int idx;
1327 int err;
1328
1329 /*
1330 * Check we can submit requests to all engines sequentially, such
1331 * that each successive request waits for the earlier ones. This
1332 * tests that we don't execute requests out of order, even though
1333 * they are running on independent engines.
1334 */
1335
1336 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
1337 if (!request)
1338 return -ENOMEM;
1339
1340 err = igt_live_test_begin(t: &t, i915, func: __func__, name: "");
1341 if (err)
1342 goto out_free;
1343
1344 idx = 0;
1345 for_each_uabi_engine(engine, i915) {
1346 struct i915_vma *batch;
1347
1348 batch = recursive_batch(gt: engine->gt);
1349 if (IS_ERR(ptr: batch)) {
1350 err = PTR_ERR(ptr: batch);
1351 pr_err("%s: Unable to create batch for %s, err=%d\n",
1352 __func__, engine->name, err);
1353 goto out_free;
1354 }
1355
1356 i915_vma_lock(vma: batch);
1357 request[idx] = intel_engine_create_kernel_request(engine);
1358 if (IS_ERR(ptr: request[idx])) {
1359 err = PTR_ERR(ptr: request[idx]);
1360 pr_err("%s: Request allocation failed for %s with err=%d\n",
1361 __func__, engine->name, err);
1362 goto out_unlock;
1363 }
1364 GEM_BUG_ON(request[idx]->context->vm != batch->vm);
1365
1366 if (prev) {
1367 err = i915_request_await_dma_fence(rq: request[idx],
1368 fence: &prev->fence);
1369 if (err) {
1370 i915_request_add(rq: request[idx]);
1371 pr_err("%s: Request await failed for %s with err=%d\n",
1372 __func__, engine->name, err);
1373 goto out_unlock;
1374 }
1375 }
1376
1377 err = i915_vma_move_to_active(vma: batch, rq: request[idx], flags: 0);
1378 GEM_BUG_ON(err);
1379
1380 err = emit_bb_start(rq: request[idx], batch);
1381 GEM_BUG_ON(err);
1382 request[idx]->batch = batch;
1383
1384 i915_request_get(rq: request[idx]);
1385 i915_request_add(rq: request[idx]);
1386
1387 prev = request[idx];
1388 idx++;
1389
1390out_unlock:
1391 i915_vma_unlock(vma: batch);
1392 if (err)
1393 goto out_request;
1394 }
1395
1396 idx = 0;
1397 for_each_uabi_engine(engine, i915) {
1398 long timeout;
1399
1400 if (i915_request_completed(rq: request[idx])) {
1401 pr_err("%s(%s): request completed too early!\n",
1402 __func__, engine->name);
1403 err = -EINVAL;
1404 goto out_request;
1405 }
1406
1407 err = recursive_batch_resolve(batch: request[idx]->batch);
1408 if (err) {
1409 pr_err("%s: failed to resolve batch, err=%d\n",
1410 __func__, err);
1411 goto out_request;
1412 }
1413
1414 timeout = i915_request_wait(rq: request[idx], flags: 0,
1415 MAX_SCHEDULE_TIMEOUT);
1416 if (timeout < 0) {
1417 err = timeout;
1418 pr_err("%s: error waiting for request on %s, err=%d\n",
1419 __func__, engine->name, err);
1420 goto out_request;
1421 }
1422
1423 GEM_BUG_ON(!i915_request_completed(request[idx]));
1424 idx++;
1425 }
1426
1427 err = igt_live_test_end(t: &t);
1428
1429out_request:
1430 idx = 0;
1431 for_each_uabi_engine(engine, i915) {
1432 u32 *cmd;
1433
1434 if (!request[idx])
1435 break;
1436
1437 cmd = i915_gem_object_pin_map_unlocked(obj: request[idx]->batch->obj,
1438 type: I915_MAP_WC);
1439 if (!IS_ERR(ptr: cmd)) {
1440 *cmd = MI_BATCH_BUFFER_END;
1441
1442 __i915_gem_object_flush_map(obj: request[idx]->batch->obj,
1443 offset: 0, size: sizeof(*cmd));
1444 i915_gem_object_unpin_map(obj: request[idx]->batch->obj);
1445
1446 intel_gt_chipset_flush(gt: engine->gt);
1447 }
1448
1449 i915_vma_put(vma: request[idx]->batch);
1450 i915_request_put(rq: request[idx]);
1451 idx++;
1452 }
1453out_free:
1454 kfree(objp: request);
1455 return err;
1456}
1457
1458struct parallel_thread {
1459 struct kthread_worker *worker;
1460 struct kthread_work work;
1461 struct intel_engine_cs *engine;
1462 int result;
1463};
1464
1465static void __live_parallel_engine1(struct kthread_work *work)
1466{
1467 struct parallel_thread *thread =
1468 container_of(work, typeof(*thread), work);
1469 struct intel_engine_cs *engine = thread->engine;
1470 IGT_TIMEOUT(end_time);
1471 unsigned long count;
1472 int err = 0;
1473
1474 count = 0;
1475 intel_engine_pm_get(engine);
1476 do {
1477 struct i915_request *rq;
1478
1479 rq = i915_request_create(ce: engine->kernel_context);
1480 if (IS_ERR(ptr: rq)) {
1481 err = PTR_ERR(ptr: rq);
1482 break;
1483 }
1484
1485 i915_request_get(rq);
1486 i915_request_add(rq);
1487
1488 err = 0;
1489 if (i915_request_wait(rq, flags: 0, HZ) < 0)
1490 err = -ETIME;
1491 i915_request_put(rq);
1492 if (err)
1493 break;
1494
1495 count++;
1496 } while (!__igt_timeout(timeout: end_time, NULL));
1497 intel_engine_pm_put(engine);
1498
1499 pr_info("%s: %lu request + sync\n", engine->name, count);
1500 thread->result = err;
1501}
1502
1503static void __live_parallel_engineN(struct kthread_work *work)
1504{
1505 struct parallel_thread *thread =
1506 container_of(work, typeof(*thread), work);
1507 struct intel_engine_cs *engine = thread->engine;
1508 IGT_TIMEOUT(end_time);
1509 unsigned long count;
1510 int err = 0;
1511
1512 count = 0;
1513 intel_engine_pm_get(engine);
1514 do {
1515 struct i915_request *rq;
1516
1517 rq = i915_request_create(ce: engine->kernel_context);
1518 if (IS_ERR(ptr: rq)) {
1519 err = PTR_ERR(ptr: rq);
1520 break;
1521 }
1522
1523 i915_request_add(rq);
1524 count++;
1525 } while (!__igt_timeout(timeout: end_time, NULL));
1526 intel_engine_pm_put(engine);
1527
1528 pr_info("%s: %lu requests\n", engine->name, count);
1529 thread->result = err;
1530}
1531
1532static bool wake_all(struct drm_i915_private *i915)
1533{
1534 if (atomic_dec_and_test(v: &i915->selftest.counter)) {
1535 wake_up_var(var: &i915->selftest.counter);
1536 return true;
1537 }
1538
1539 return false;
1540}
1541
1542static int wait_for_all(struct drm_i915_private *i915)
1543{
1544 if (wake_all(i915))
1545 return 0;
1546
1547 if (wait_var_event_timeout(&i915->selftest.counter,
1548 !atomic_read(&i915->selftest.counter),
1549 i915_selftest.timeout_jiffies))
1550 return 0;
1551
1552 return -ETIME;
1553}
1554
1555static void __live_parallel_spin(struct kthread_work *work)
1556{
1557 struct parallel_thread *thread =
1558 container_of(work, typeof(*thread), work);
1559 struct intel_engine_cs *engine = thread->engine;
1560 struct igt_spinner spin;
1561 struct i915_request *rq;
1562 int err = 0;
1563
1564 /*
1565 * Create a spinner running for eternity on each engine. If a second
1566 * spinner is incorrectly placed on the same engine, it will not be
1567 * able to start in time.
1568 */
1569
1570 if (igt_spinner_init(spin: &spin, gt: engine->gt)) {
1571 wake_all(i915: engine->i915);
1572 thread->result = -ENOMEM;
1573 return;
1574 }
1575
1576 intel_engine_pm_get(engine);
1577 rq = igt_spinner_create_request(spin: &spin,
1578 ce: engine->kernel_context,
1579 MI_NOOP); /* no preemption */
1580 intel_engine_pm_put(engine);
1581 if (IS_ERR(ptr: rq)) {
1582 err = PTR_ERR(ptr: rq);
1583 if (err == -ENODEV)
1584 err = 0;
1585 wake_all(i915: engine->i915);
1586 goto out_spin;
1587 }
1588
1589 i915_request_get(rq);
1590 i915_request_add(rq);
1591 if (igt_wait_for_spinner(spin: &spin, rq)) {
1592 /* Occupy this engine for the whole test */
1593 err = wait_for_all(i915: engine->i915);
1594 } else {
1595 pr_err("Failed to start spinner on %s\n", engine->name);
1596 err = -EINVAL;
1597 }
1598 igt_spinner_end(spin: &spin);
1599
1600 if (err == 0 && i915_request_wait(rq, flags: 0, HZ) < 0)
1601 err = -EIO;
1602 i915_request_put(rq);
1603
1604out_spin:
1605 igt_spinner_fini(spin: &spin);
1606 thread->result = err;
1607}
1608
1609static int live_parallel_engines(void *arg)
1610{
1611 struct drm_i915_private *i915 = arg;
1612 static void (* const func[])(struct kthread_work *) = {
1613 __live_parallel_engine1,
1614 __live_parallel_engineN,
1615 __live_parallel_spin,
1616 NULL,
1617 };
1618 const unsigned int nengines = num_uabi_engines(i915);
1619 struct parallel_thread *threads;
1620 struct intel_engine_cs *engine;
1621 void (* const *fn)(struct kthread_work *);
1622 int err = 0;
1623
1624 /*
1625 * Check we can submit requests to all engines concurrently. This
1626 * tests that we load up the system maximally.
1627 */
1628
1629 threads = kcalloc(nengines, sizeof(*threads), GFP_KERNEL);
1630 if (!threads)
1631 return -ENOMEM;
1632
1633 for (fn = func; !err && *fn; fn++) {
1634 char name[KSYM_NAME_LEN];
1635 struct igt_live_test t;
1636 unsigned int idx;
1637
1638 snprintf(buf: name, size: sizeof(name), fmt: "%ps", *fn);
1639 err = igt_live_test_begin(t: &t, i915, func: __func__, name);
1640 if (err)
1641 break;
1642
1643 atomic_set(v: &i915->selftest.counter, i: nengines);
1644
1645 idx = 0;
1646 for_each_uabi_engine(engine, i915) {
1647 struct kthread_worker *worker;
1648
1649 worker = kthread_run_worker(0, "igt/parallel:%s",
1650 engine->name);
1651 if (IS_ERR(ptr: worker)) {
1652 err = PTR_ERR(ptr: worker);
1653 break;
1654 }
1655
1656 threads[idx].worker = worker;
1657 threads[idx].result = 0;
1658 threads[idx].engine = engine;
1659
1660 kthread_init_work(&threads[idx].work, *fn);
1661 kthread_queue_work(worker, work: &threads[idx].work);
1662 idx++;
1663 }
1664
1665 idx = 0;
1666 for_each_uabi_engine(engine, i915) {
1667 int status;
1668
1669 if (!threads[idx].worker)
1670 break;
1671
1672 kthread_flush_work(work: &threads[idx].work);
1673 status = READ_ONCE(threads[idx].result);
1674 if (status && !err)
1675 err = status;
1676
1677 kthread_destroy_worker(worker: threads[idx++].worker);
1678 }
1679
1680 if (igt_live_test_end(t: &t))
1681 err = -EIO;
1682 }
1683
1684 kfree(objp: threads);
1685 return err;
1686}
1687
1688static int
1689max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1690{
1691 struct i915_request *rq;
1692 int ret;
1693
1694 /*
1695 * Before execlists, all contexts share the same ringbuffer. With
1696 * execlists, each context/engine has a separate ringbuffer and
1697 * for the purposes of this test, inexhaustible.
1698 *
1699 * For the global ringbuffer though, we have to be very careful
1700 * that we do not wrap while preventing the execution of requests
1701 * with a unsignaled fence.
1702 */
1703 if (HAS_EXECLISTS(ctx->i915))
1704 return INT_MAX;
1705
1706 rq = igt_request_alloc(ctx, engine);
1707 if (IS_ERR(ptr: rq)) {
1708 ret = PTR_ERR(ptr: rq);
1709 } else {
1710 int sz;
1711
1712 ret = rq->ring->size - rq->reserved_space;
1713 i915_request_add(rq);
1714
1715 sz = rq->ring->emit - rq->head;
1716 if (sz < 0)
1717 sz += rq->ring->size;
1718 ret /= sz;
1719 ret /= 2; /* leave half spare, in case of emergency! */
1720 }
1721
1722 return ret;
1723}
1724
1725static int live_breadcrumbs_smoketest(void *arg)
1726{
1727 struct drm_i915_private *i915 = arg;
1728 const unsigned int nengines = num_uabi_engines(i915);
1729 const unsigned int ncpus = /* saturate with nengines * ncpus */
1730 max_t(int, 2, DIV_ROUND_UP(num_online_cpus(), nengines));
1731 unsigned long num_waits, num_fences;
1732 struct intel_engine_cs *engine;
1733 struct smoke_thread *threads;
1734 struct igt_live_test live;
1735 intel_wakeref_t wakeref;
1736 struct smoketest *smoke;
1737 unsigned int n, idx;
1738 struct file *file;
1739 int ret = 0;
1740
1741 /*
1742 * Smoketest our breadcrumb/signal handling for requests across multiple
1743 * threads. A very simple test to only catch the most egregious of bugs.
1744 * See __igt_breadcrumbs_smoketest();
1745 *
1746 * On real hardware this time.
1747 */
1748
1749 wakeref = intel_runtime_pm_get(rpm: &i915->runtime_pm);
1750
1751 file = mock_file(i915);
1752 if (IS_ERR(ptr: file)) {
1753 ret = PTR_ERR(ptr: file);
1754 goto out_rpm;
1755 }
1756
1757 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1758 if (!smoke) {
1759 ret = -ENOMEM;
1760 goto out_file;
1761 }
1762
1763 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1764 if (!threads) {
1765 ret = -ENOMEM;
1766 goto out_smoke;
1767 }
1768
1769 smoke[0].request_alloc = __live_request_alloc;
1770 smoke[0].ncontexts = 64;
1771 smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1772 sizeof(*smoke[0].contexts),
1773 GFP_KERNEL);
1774 if (!smoke[0].contexts) {
1775 ret = -ENOMEM;
1776 goto out_threads;
1777 }
1778
1779 for (n = 0; n < smoke[0].ncontexts; n++) {
1780 smoke[0].contexts[n] = live_context(i915, file);
1781 if (IS_ERR(ptr: smoke[0].contexts[n])) {
1782 ret = PTR_ERR(ptr: smoke[0].contexts[n]);
1783 goto out_contexts;
1784 }
1785 }
1786
1787 ret = igt_live_test_begin(t: &live, i915, func: __func__, name: "");
1788 if (ret)
1789 goto out_contexts;
1790
1791 idx = 0;
1792 for_each_uabi_engine(engine, i915) {
1793 smoke[idx] = smoke[0];
1794 smoke[idx].engine = engine;
1795 smoke[idx].max_batch =
1796 max_batches(ctx: smoke[0].contexts[0], engine);
1797 if (smoke[idx].max_batch < 0) {
1798 ret = smoke[idx].max_batch;
1799 goto out_flush;
1800 }
1801 /* One ring interleaved between requests from all cpus */
1802 smoke[idx].max_batch /= ncpus + 1;
1803 pr_debug("Limiting batches to %d requests on %s\n",
1804 smoke[idx].max_batch, engine->name);
1805
1806 for (n = 0; n < ncpus; n++) {
1807 unsigned int i = idx * ncpus + n;
1808 struct kthread_worker *worker;
1809
1810 worker = kthread_run_worker(0, "igt/%d.%d", idx, n);
1811 if (IS_ERR(ptr: worker)) {
1812 ret = PTR_ERR(ptr: worker);
1813 goto out_flush;
1814 }
1815
1816 threads[i].worker = worker;
1817 threads[i].t = &smoke[idx];
1818
1819 kthread_init_work(&threads[i].work,
1820 __igt_breadcrumbs_smoketest);
1821 kthread_queue_work(worker, work: &threads[i].work);
1822 }
1823
1824 idx++;
1825 }
1826
1827 msleep(msecs: jiffies_to_msecs(j: i915_selftest.timeout_jiffies));
1828
1829out_flush:
1830 idx = 0;
1831 num_waits = 0;
1832 num_fences = 0;
1833 for_each_uabi_engine(engine, i915) {
1834 for (n = 0; n < ncpus; n++) {
1835 unsigned int i = idx * ncpus + n;
1836 int err;
1837
1838 if (!threads[i].worker)
1839 continue;
1840
1841 WRITE_ONCE(threads[i].stop, true);
1842 kthread_flush_work(work: &threads[i].work);
1843 err = READ_ONCE(threads[i].result);
1844 if (err < 0 && !ret)
1845 ret = err;
1846
1847 kthread_destroy_worker(worker: threads[i].worker);
1848 }
1849
1850 num_waits += atomic_long_read(v: &smoke[idx].num_waits);
1851 num_fences += atomic_long_read(v: &smoke[idx].num_fences);
1852 idx++;
1853 }
1854 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1855 num_waits, num_fences, idx, ncpus);
1856
1857 ret = igt_live_test_end(t: &live) ?: ret;
1858out_contexts:
1859 kfree(objp: smoke[0].contexts);
1860out_threads:
1861 kfree(objp: threads);
1862out_smoke:
1863 kfree(objp: smoke);
1864out_file:
1865 fput(file);
1866out_rpm:
1867 intel_runtime_pm_put(rpm: &i915->runtime_pm, wref: wakeref);
1868
1869 return ret;
1870}
1871
1872int i915_request_live_selftests(struct drm_i915_private *i915)
1873{
1874 static const struct i915_subtest tests[] = {
1875 SUBTEST(live_nop_request),
1876 SUBTEST(live_all_engines),
1877 SUBTEST(live_sequential_engines),
1878 SUBTEST(live_parallel_engines),
1879 SUBTEST(live_empty_request),
1880 SUBTEST(live_cancel_request),
1881 SUBTEST(live_breadcrumbs_smoketest),
1882 };
1883
1884 if (intel_gt_is_wedged(gt: to_gt(i915)))
1885 return 0;
1886
1887 return i915_live_subtests(tests, i915);
1888}
1889
1890static int switch_to_kernel_sync(struct intel_context *ce, int err)
1891{
1892 struct i915_request *rq;
1893 struct dma_fence *fence;
1894
1895 rq = intel_engine_create_kernel_request(engine: ce->engine);
1896 if (IS_ERR(ptr: rq))
1897 return PTR_ERR(ptr: rq);
1898
1899 fence = i915_active_fence_get(active: &ce->timeline->last_request);
1900 if (fence) {
1901 i915_request_await_dma_fence(rq, fence);
1902 dma_fence_put(fence);
1903 }
1904
1905 rq = i915_request_get(rq);
1906 i915_request_add(rq);
1907 if (i915_request_wait(rq, flags: 0, HZ / 2) < 0 && !err)
1908 err = -ETIME;
1909 i915_request_put(rq);
1910
1911 while (!err && !intel_engine_is_idle(engine: ce->engine))
1912 intel_engine_flush_submission(engine: ce->engine);
1913
1914 return err;
1915}
1916
1917struct perf_stats {
1918 struct intel_engine_cs *engine;
1919 unsigned long count;
1920 ktime_t time;
1921 ktime_t busy;
1922 u64 runtime;
1923};
1924
1925struct perf_series {
1926 struct drm_i915_private *i915;
1927 unsigned int nengines;
1928 struct intel_context *ce[] __counted_by(nengines);
1929};
1930
1931static int cmp_u32(const void *A, const void *B)
1932{
1933 const u32 *a = A, *b = B;
1934
1935 return *a - *b;
1936}
1937
1938static u32 trifilter(u32 *a)
1939{
1940 u64 sum;
1941
1942#define TF_COUNT 5
1943 sort(base: a, TF_COUNT, size: sizeof(*a), cmp_func: cmp_u32, NULL);
1944
1945 sum = mul_u32_u32(a: a[2], b: 2);
1946 sum += a[1];
1947 sum += a[3];
1948
1949 GEM_BUG_ON(sum > U32_MAX);
1950 return sum;
1951#define TF_BIAS 2
1952}
1953
1954static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles)
1955{
1956 u64 ns = intel_gt_clock_interval_to_ns(gt: engine->gt, count: cycles);
1957
1958 return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS);
1959}
1960
1961static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset)
1962{
1963 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT;
1964 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base)));
1965 *cs++ = offset;
1966 *cs++ = 0;
1967
1968 return cs;
1969}
1970
1971static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value)
1972{
1973 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
1974 *cs++ = offset;
1975 *cs++ = 0;
1976 *cs++ = value;
1977
1978 return cs;
1979}
1980
1981static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset)
1982{
1983 *cs++ = MI_SEMAPHORE_WAIT |
1984 MI_SEMAPHORE_GLOBAL_GTT |
1985 MI_SEMAPHORE_POLL |
1986 mode;
1987 *cs++ = value;
1988 *cs++ = offset;
1989 *cs++ = 0;
1990
1991 return cs;
1992}
1993
1994static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value)
1995{
1996 return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset);
1997}
1998
1999static void semaphore_set(u32 *sema, u32 value)
2000{
2001 WRITE_ONCE(*sema, value);
2002 wmb(); /* flush the update to the cache, and beyond */
2003}
2004
2005static u32 *hwsp_scratch(const struct intel_context *ce)
2006{
2007 return memset32(s: ce->engine->status_page.addr + 1000, v: 0, n: 21);
2008}
2009
2010static u32 hwsp_offset(const struct intel_context *ce, u32 *dw)
2011{
2012 return (i915_ggtt_offset(vma: ce->engine->status_page.vma) +
2013 offset_in_page(dw));
2014}
2015
2016static int measure_semaphore_response(struct intel_context *ce)
2017{
2018 u32 *sema = hwsp_scratch(ce);
2019 const u32 offset = hwsp_offset(ce, dw: sema);
2020 u32 elapsed[TF_COUNT], cycles;
2021 struct i915_request *rq;
2022 u32 *cs;
2023 int err;
2024 int i;
2025
2026 /*
2027 * Measure how many cycles it takes for the HW to detect the change
2028 * in a semaphore value.
2029 *
2030 * A: read CS_TIMESTAMP from CPU
2031 * poke semaphore
2032 * B: read CS_TIMESTAMP on GPU
2033 *
2034 * Semaphore latency: B - A
2035 */
2036
2037 semaphore_set(sema, value: -1);
2038
2039 rq = i915_request_create(ce);
2040 if (IS_ERR(ptr: rq))
2041 return PTR_ERR(ptr: rq);
2042
2043 cs = intel_ring_begin(rq, num_dwords: 4 + 12 * ARRAY_SIZE(elapsed));
2044 if (IS_ERR(ptr: cs)) {
2045 i915_request_add(rq);
2046 err = PTR_ERR(ptr: cs);
2047 goto err;
2048 }
2049
2050 cs = emit_store_dw(cs, offset, value: 0);
2051 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2052 cs = emit_semaphore_poll_until(cs, offset, value: i);
2053 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2054 cs = emit_store_dw(cs, offset, value: 0);
2055 }
2056
2057 intel_ring_advance(rq, cs);
2058 i915_request_add(rq);
2059
2060 if (wait_for(READ_ONCE(*sema) == 0, 50)) {
2061 err = -EIO;
2062 goto err;
2063 }
2064
2065 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2066 preempt_disable();
2067 cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2068 semaphore_set(sema, value: i);
2069 preempt_enable();
2070
2071 if (wait_for(READ_ONCE(*sema) == 0, 50)) {
2072 err = -EIO;
2073 goto err;
2074 }
2075
2076 elapsed[i - 1] = sema[i] - cycles;
2077 }
2078
2079 cycles = trifilter(a: elapsed);
2080 pr_info("%s: semaphore response %d cycles, %lluns\n",
2081 ce->engine->name, cycles >> TF_BIAS,
2082 cycles_to_ns(ce->engine, cycles));
2083
2084 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2085
2086err:
2087 intel_gt_set_wedged(gt: ce->engine->gt);
2088 return err;
2089}
2090
2091static int measure_idle_dispatch(struct intel_context *ce)
2092{
2093 u32 *sema = hwsp_scratch(ce);
2094 const u32 offset = hwsp_offset(ce, dw: sema);
2095 u32 elapsed[TF_COUNT], cycles;
2096 u32 *cs;
2097 int err;
2098 int i;
2099
2100 /*
2101 * Measure how long it takes for us to submit a request while the
2102 * engine is idle, but is resting in our context.
2103 *
2104 * A: read CS_TIMESTAMP from CPU
2105 * submit request
2106 * B: read CS_TIMESTAMP on GPU
2107 *
2108 * Submission latency: B - A
2109 */
2110
2111 for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
2112 struct i915_request *rq;
2113
2114 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2115 if (err)
2116 return err;
2117
2118 rq = i915_request_create(ce);
2119 if (IS_ERR(ptr: rq)) {
2120 err = PTR_ERR(ptr: rq);
2121 goto err;
2122 }
2123
2124 cs = intel_ring_begin(rq, num_dwords: 4);
2125 if (IS_ERR(ptr: cs)) {
2126 i915_request_add(rq);
2127 err = PTR_ERR(ptr: cs);
2128 goto err;
2129 }
2130
2131 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2132
2133 intel_ring_advance(rq, cs);
2134
2135 preempt_disable();
2136 local_bh_disable();
2137 elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2138 i915_request_add(rq);
2139 local_bh_enable();
2140 preempt_enable();
2141 }
2142
2143 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2144 if (err)
2145 goto err;
2146
2147 for (i = 0; i < ARRAY_SIZE(elapsed); i++)
2148 elapsed[i] = sema[i] - elapsed[i];
2149
2150 cycles = trifilter(a: elapsed);
2151 pr_info("%s: idle dispatch latency %d cycles, %lluns\n",
2152 ce->engine->name, cycles >> TF_BIAS,
2153 cycles_to_ns(ce->engine, cycles));
2154
2155 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2156
2157err:
2158 intel_gt_set_wedged(gt: ce->engine->gt);
2159 return err;
2160}
2161
2162static int measure_busy_dispatch(struct intel_context *ce)
2163{
2164 u32 *sema = hwsp_scratch(ce);
2165 const u32 offset = hwsp_offset(ce, dw: sema);
2166 u32 elapsed[TF_COUNT + 1], cycles;
2167 u32 *cs;
2168 int err;
2169 int i;
2170
2171 /*
2172 * Measure how long it takes for us to submit a request while the
2173 * engine is busy, polling on a semaphore in our context. With
2174 * direct submission, this will include the cost of a lite restore.
2175 *
2176 * A: read CS_TIMESTAMP from CPU
2177 * submit request
2178 * B: read CS_TIMESTAMP on GPU
2179 *
2180 * Submission latency: B - A
2181 */
2182
2183 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2184 struct i915_request *rq;
2185
2186 rq = i915_request_create(ce);
2187 if (IS_ERR(ptr: rq)) {
2188 err = PTR_ERR(ptr: rq);
2189 goto err;
2190 }
2191
2192 cs = intel_ring_begin(rq, num_dwords: 12);
2193 if (IS_ERR(ptr: cs)) {
2194 i915_request_add(rq);
2195 err = PTR_ERR(ptr: cs);
2196 goto err;
2197 }
2198
2199 cs = emit_store_dw(cs, offset: offset + i * sizeof(u32), value: -1);
2200 cs = emit_semaphore_poll_until(cs, offset, value: i);
2201 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2202
2203 intel_ring_advance(rq, cs);
2204
2205 if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) {
2206 err = -EIO;
2207 goto err;
2208 }
2209
2210 preempt_disable();
2211 local_bh_disable();
2212 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2213 i915_request_add(rq);
2214 local_bh_enable();
2215 semaphore_set(sema, value: i - 1);
2216 preempt_enable();
2217 }
2218
2219 wait_for(READ_ONCE(sema[i - 1]), 500);
2220 semaphore_set(sema, value: i - 1);
2221
2222 for (i = 1; i <= TF_COUNT; i++) {
2223 GEM_BUG_ON(sema[i] == -1);
2224 elapsed[i - 1] = sema[i] - elapsed[i];
2225 }
2226
2227 cycles = trifilter(a: elapsed);
2228 pr_info("%s: busy dispatch latency %d cycles, %lluns\n",
2229 ce->engine->name, cycles >> TF_BIAS,
2230 cycles_to_ns(ce->engine, cycles));
2231
2232 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2233
2234err:
2235 intel_gt_set_wedged(gt: ce->engine->gt);
2236 return err;
2237}
2238
2239static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value)
2240{
2241 const u32 offset =
2242 i915_ggtt_offset(vma: engine->status_page.vma) +
2243 offset_in_page(sema);
2244 struct i915_request *rq;
2245 u32 *cs;
2246
2247 rq = i915_request_create(ce: engine->kernel_context);
2248 if (IS_ERR(ptr: rq))
2249 return PTR_ERR(ptr: rq);
2250
2251 cs = intel_ring_begin(rq, num_dwords: 4);
2252 if (IS_ERR(ptr: cs)) {
2253 i915_request_add(rq);
2254 return PTR_ERR(ptr: cs);
2255 }
2256
2257 cs = emit_semaphore_poll(cs, mode, value, offset);
2258
2259 intel_ring_advance(rq, cs);
2260 i915_request_add(rq);
2261
2262 return 0;
2263}
2264
2265static int measure_inter_request(struct intel_context *ce)
2266{
2267 u32 *sema = hwsp_scratch(ce);
2268 const u32 offset = hwsp_offset(ce, dw: sema);
2269 u32 elapsed[TF_COUNT + 1], cycles;
2270 struct i915_sw_fence *submit;
2271 int i, err;
2272
2273 /*
2274 * Measure how long it takes to advance from one request into the
2275 * next. Between each request we flush the GPU caches to memory,
2276 * update the breadcrumbs, and then invalidate those caches.
2277 * We queue up all the requests to be submitted in one batch so
2278 * it should be one set of contiguous measurements.
2279 *
2280 * A: read CS_TIMESTAMP on GPU
2281 * advance request
2282 * B: read CS_TIMESTAMP on GPU
2283 *
2284 * Request latency: B - A
2285 */
2286
2287 err = plug(engine: ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, value: 0);
2288 if (err)
2289 return err;
2290
2291 submit = heap_fence_create(GFP_KERNEL);
2292 if (!submit) {
2293 semaphore_set(sema, value: 1);
2294 return -ENOMEM;
2295 }
2296
2297 intel_engine_flush_submission(engine: ce->engine);
2298 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2299 struct i915_request *rq;
2300 u32 *cs;
2301
2302 rq = i915_request_create(ce);
2303 if (IS_ERR(ptr: rq)) {
2304 err = PTR_ERR(ptr: rq);
2305 goto err_submit;
2306 }
2307
2308 err = i915_sw_fence_await_sw_fence_gfp(fence: &rq->submit,
2309 after: submit,
2310 GFP_KERNEL);
2311 if (err < 0) {
2312 i915_request_add(rq);
2313 goto err_submit;
2314 }
2315
2316 cs = intel_ring_begin(rq, num_dwords: 4);
2317 if (IS_ERR(ptr: cs)) {
2318 i915_request_add(rq);
2319 err = PTR_ERR(ptr: cs);
2320 goto err_submit;
2321 }
2322
2323 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2324
2325 intel_ring_advance(rq, cs);
2326 i915_request_add(rq);
2327 }
2328 i915_sw_fence_commit(fence: submit);
2329 intel_engine_flush_submission(engine: ce->engine);
2330 heap_fence_put(fence: submit);
2331
2332 semaphore_set(sema, value: 1);
2333 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2334 if (err)
2335 goto err;
2336
2337 for (i = 1; i <= TF_COUNT; i++)
2338 elapsed[i - 1] = sema[i + 1] - sema[i];
2339
2340 cycles = trifilter(a: elapsed);
2341 pr_info("%s: inter-request latency %d cycles, %lluns\n",
2342 ce->engine->name, cycles >> TF_BIAS,
2343 cycles_to_ns(ce->engine, cycles));
2344
2345 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2346
2347err_submit:
2348 i915_sw_fence_commit(fence: submit);
2349 heap_fence_put(fence: submit);
2350 semaphore_set(sema, value: 1);
2351err:
2352 intel_gt_set_wedged(gt: ce->engine->gt);
2353 return err;
2354}
2355
2356static int measure_context_switch(struct intel_context *ce)
2357{
2358 u32 *sema = hwsp_scratch(ce);
2359 const u32 offset = hwsp_offset(ce, dw: sema);
2360 struct i915_request *fence = NULL;
2361 u32 elapsed[TF_COUNT + 1], cycles;
2362 int i, j, err;
2363 u32 *cs;
2364
2365 /*
2366 * Measure how long it takes to advance from one request in one
2367 * context to a request in another context. This allows us to
2368 * measure how long the context save/restore take, along with all
2369 * the inter-context setup we require.
2370 *
2371 * A: read CS_TIMESTAMP on GPU
2372 * switch context
2373 * B: read CS_TIMESTAMP on GPU
2374 *
2375 * Context switch latency: B - A
2376 */
2377
2378 err = plug(engine: ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, value: 0);
2379 if (err)
2380 return err;
2381
2382 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2383 struct intel_context *arr[] = {
2384 ce, ce->engine->kernel_context
2385 };
2386 u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32);
2387
2388 for (j = 0; j < ARRAY_SIZE(arr); j++) {
2389 struct i915_request *rq;
2390
2391 rq = i915_request_create(ce: arr[j]);
2392 if (IS_ERR(ptr: rq)) {
2393 err = PTR_ERR(ptr: rq);
2394 goto err_fence;
2395 }
2396
2397 if (fence) {
2398 err = i915_request_await_dma_fence(rq,
2399 fence: &fence->fence);
2400 if (err) {
2401 i915_request_add(rq);
2402 goto err_fence;
2403 }
2404 }
2405
2406 cs = intel_ring_begin(rq, num_dwords: 4);
2407 if (IS_ERR(ptr: cs)) {
2408 i915_request_add(rq);
2409 err = PTR_ERR(ptr: cs);
2410 goto err_fence;
2411 }
2412
2413 cs = emit_timestamp_store(cs, ce, offset: addr);
2414 addr += sizeof(u32);
2415
2416 intel_ring_advance(rq, cs);
2417
2418 i915_request_put(rq: fence);
2419 fence = i915_request_get(rq);
2420
2421 i915_request_add(rq);
2422 }
2423 }
2424 i915_request_put(rq: fence);
2425 intel_engine_flush_submission(engine: ce->engine);
2426
2427 semaphore_set(sema, value: 1);
2428 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2429 if (err)
2430 goto err;
2431
2432 for (i = 1; i <= TF_COUNT; i++)
2433 elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1];
2434
2435 cycles = trifilter(a: elapsed);
2436 pr_info("%s: context switch latency %d cycles, %lluns\n",
2437 ce->engine->name, cycles >> TF_BIAS,
2438 cycles_to_ns(ce->engine, cycles));
2439
2440 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2441
2442err_fence:
2443 i915_request_put(rq: fence);
2444 semaphore_set(sema, value: 1);
2445err:
2446 intel_gt_set_wedged(gt: ce->engine->gt);
2447 return err;
2448}
2449
2450static int measure_preemption(struct intel_context *ce)
2451{
2452 u32 *sema = hwsp_scratch(ce);
2453 const u32 offset = hwsp_offset(ce, dw: sema);
2454 u32 elapsed[TF_COUNT], cycles;
2455 u32 *cs;
2456 int err;
2457 int i;
2458
2459 /*
2460 * We measure two latencies while triggering preemption. The first
2461 * latency is how long it takes for us to submit a preempting request.
2462 * The second latency is how it takes for us to return from the
2463 * preemption back to the original context.
2464 *
2465 * A: read CS_TIMESTAMP from CPU
2466 * submit preemption
2467 * B: read CS_TIMESTAMP on GPU (in preempting context)
2468 * context switch
2469 * C: read CS_TIMESTAMP on GPU (in original context)
2470 *
2471 * Preemption dispatch latency: B - A
2472 * Preemption switch latency: C - B
2473 */
2474
2475 if (!intel_engine_has_preemption(engine: ce->engine))
2476 return 0;
2477
2478 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2479 u32 addr = offset + 2 * i * sizeof(u32);
2480 struct i915_request *rq;
2481
2482 rq = i915_request_create(ce);
2483 if (IS_ERR(ptr: rq)) {
2484 err = PTR_ERR(ptr: rq);
2485 goto err;
2486 }
2487
2488 cs = intel_ring_begin(rq, num_dwords: 12);
2489 if (IS_ERR(ptr: cs)) {
2490 i915_request_add(rq);
2491 err = PTR_ERR(ptr: cs);
2492 goto err;
2493 }
2494
2495 cs = emit_store_dw(cs, offset: addr, value: -1);
2496 cs = emit_semaphore_poll_until(cs, offset, value: i);
2497 cs = emit_timestamp_store(cs, ce, offset: addr + sizeof(u32));
2498
2499 intel_ring_advance(rq, cs);
2500 i915_request_add(rq);
2501
2502 if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) {
2503 err = -EIO;
2504 goto err;
2505 }
2506
2507 rq = i915_request_create(ce: ce->engine->kernel_context);
2508 if (IS_ERR(ptr: rq)) {
2509 err = PTR_ERR(ptr: rq);
2510 goto err;
2511 }
2512
2513 cs = intel_ring_begin(rq, num_dwords: 8);
2514 if (IS_ERR(ptr: cs)) {
2515 i915_request_add(rq);
2516 err = PTR_ERR(ptr: cs);
2517 goto err;
2518 }
2519
2520 cs = emit_timestamp_store(cs, ce, offset: addr);
2521 cs = emit_store_dw(cs, offset, value: i);
2522
2523 intel_ring_advance(rq, cs);
2524 rq->sched.attr.priority = I915_PRIORITY_BARRIER;
2525
2526 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2527 i915_request_add(rq);
2528 }
2529
2530 if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) {
2531 err = -EIO;
2532 goto err;
2533 }
2534
2535 for (i = 1; i <= TF_COUNT; i++)
2536 elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1];
2537
2538 cycles = trifilter(a: elapsed);
2539 pr_info("%s: preemption dispatch latency %d cycles, %lluns\n",
2540 ce->engine->name, cycles >> TF_BIAS,
2541 cycles_to_ns(ce->engine, cycles));
2542
2543 for (i = 1; i <= TF_COUNT; i++)
2544 elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0];
2545
2546 cycles = trifilter(a: elapsed);
2547 pr_info("%s: preemption switch latency %d cycles, %lluns\n",
2548 ce->engine->name, cycles >> TF_BIAS,
2549 cycles_to_ns(ce->engine, cycles));
2550
2551 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2552
2553err:
2554 intel_gt_set_wedged(gt: ce->engine->gt);
2555 return err;
2556}
2557
2558struct signal_cb {
2559 struct dma_fence_cb base;
2560 bool seen;
2561};
2562
2563static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
2564{
2565 struct signal_cb *s = container_of(cb, typeof(*s), base);
2566
2567 smp_store_mb(s->seen, true); /* be safe, be strong */
2568}
2569
2570static int measure_completion(struct intel_context *ce)
2571{
2572 u32 *sema = hwsp_scratch(ce);
2573 const u32 offset = hwsp_offset(ce, dw: sema);
2574 u32 elapsed[TF_COUNT], cycles;
2575 u32 *cs;
2576 int err;
2577 int i;
2578
2579 /*
2580 * Measure how long it takes for the signal (interrupt) to be
2581 * sent from the GPU to be processed by the CPU.
2582 *
2583 * A: read CS_TIMESTAMP on GPU
2584 * signal
2585 * B: read CS_TIMESTAMP from CPU
2586 *
2587 * Completion latency: B - A
2588 */
2589
2590 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) {
2591 struct signal_cb cb = { .seen = false };
2592 struct i915_request *rq;
2593
2594 rq = i915_request_create(ce);
2595 if (IS_ERR(ptr: rq)) {
2596 err = PTR_ERR(ptr: rq);
2597 goto err;
2598 }
2599
2600 cs = intel_ring_begin(rq, num_dwords: 12);
2601 if (IS_ERR(ptr: cs)) {
2602 i915_request_add(rq);
2603 err = PTR_ERR(ptr: cs);
2604 goto err;
2605 }
2606
2607 cs = emit_store_dw(cs, offset: offset + i * sizeof(u32), value: -1);
2608 cs = emit_semaphore_poll_until(cs, offset, value: i);
2609 cs = emit_timestamp_store(cs, ce, offset: offset + i * sizeof(u32));
2610
2611 intel_ring_advance(rq, cs);
2612
2613 dma_fence_add_callback(fence: &rq->fence, cb: &cb.base, func: signal_cb);
2614 i915_request_add(rq);
2615
2616 intel_engine_flush_submission(engine: ce->engine);
2617 if (wait_for(READ_ONCE(sema[i]) == -1, 50)) {
2618 err = -EIO;
2619 goto err;
2620 }
2621
2622 preempt_disable();
2623 semaphore_set(sema, value: i);
2624 while (!READ_ONCE(cb.seen))
2625 cpu_relax();
2626
2627 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP);
2628 preempt_enable();
2629 }
2630
2631 err = intel_gt_wait_for_idle(gt: ce->engine->gt, HZ / 2);
2632 if (err)
2633 goto err;
2634
2635 for (i = 0; i < ARRAY_SIZE(elapsed); i++) {
2636 GEM_BUG_ON(sema[i + 1] == -1);
2637 elapsed[i] = elapsed[i] - sema[i + 1];
2638 }
2639
2640 cycles = trifilter(a: elapsed);
2641 pr_info("%s: completion latency %d cycles, %lluns\n",
2642 ce->engine->name, cycles >> TF_BIAS,
2643 cycles_to_ns(ce->engine, cycles));
2644
2645 return intel_gt_wait_for_idle(gt: ce->engine->gt, HZ);
2646
2647err:
2648 intel_gt_set_wedged(gt: ce->engine->gt);
2649 return err;
2650}
2651
2652static void rps_pin(struct intel_gt *gt)
2653{
2654 /* Pin the frequency to max */
2655 atomic_inc(v: &gt->rps.num_waiters);
2656 intel_uncore_forcewake_get(uncore: gt->uncore, domains: FORCEWAKE_ALL);
2657
2658 mutex_lock(&gt->rps.lock);
2659 intel_rps_set(rps: &gt->rps, val: gt->rps.max_freq);
2660 mutex_unlock(lock: &gt->rps.lock);
2661}
2662
2663static void rps_unpin(struct intel_gt *gt)
2664{
2665 intel_uncore_forcewake_put(uncore: gt->uncore, domains: FORCEWAKE_ALL);
2666 atomic_dec(v: &gt->rps.num_waiters);
2667}
2668
2669static int perf_request_latency(void *arg)
2670{
2671 struct drm_i915_private *i915 = arg;
2672 struct intel_engine_cs *engine;
2673 struct pm_qos_request qos;
2674 int err = 0;
2675
2676 if (GRAPHICS_VER(i915) < 8) /* per-engine CS timestamp, semaphores */
2677 return 0;
2678
2679 cpu_latency_qos_add_request(req: &qos, value: 0); /* disable cstates */
2680
2681 for_each_uabi_engine(engine, i915) {
2682 struct intel_context *ce;
2683
2684 ce = intel_context_create(engine);
2685 if (IS_ERR(ptr: ce)) {
2686 err = PTR_ERR(ptr: ce);
2687 goto out;
2688 }
2689
2690 err = intel_context_pin(ce);
2691 if (err) {
2692 intel_context_put(ce);
2693 goto out;
2694 }
2695
2696 st_engine_heartbeat_disable(engine);
2697 rps_pin(gt: engine->gt);
2698
2699 if (err == 0)
2700 err = measure_semaphore_response(ce);
2701 if (err == 0)
2702 err = measure_idle_dispatch(ce);
2703 if (err == 0)
2704 err = measure_busy_dispatch(ce);
2705 if (err == 0)
2706 err = measure_inter_request(ce);
2707 if (err == 0)
2708 err = measure_context_switch(ce);
2709 if (err == 0)
2710 err = measure_preemption(ce);
2711 if (err == 0)
2712 err = measure_completion(ce);
2713
2714 rps_unpin(gt: engine->gt);
2715 st_engine_heartbeat_enable(engine);
2716
2717 intel_context_unpin(ce);
2718 intel_context_put(ce);
2719 if (err)
2720 goto out;
2721 }
2722
2723out:
2724 if (igt_flush_test(i915))
2725 err = -EIO;
2726
2727 cpu_latency_qos_remove_request(req: &qos);
2728 return err;
2729}
2730
2731static int s_sync0(void *arg)
2732{
2733 struct perf_series *ps = arg;
2734 IGT_TIMEOUT(end_time);
2735 unsigned int idx = 0;
2736 int err = 0;
2737
2738 GEM_BUG_ON(!ps->nengines);
2739 do {
2740 struct i915_request *rq;
2741
2742 rq = i915_request_create(ce: ps->ce[idx]);
2743 if (IS_ERR(ptr: rq)) {
2744 err = PTR_ERR(ptr: rq);
2745 break;
2746 }
2747
2748 i915_request_get(rq);
2749 i915_request_add(rq);
2750
2751 if (i915_request_wait(rq, flags: 0, HZ / 5) < 0)
2752 err = -ETIME;
2753 i915_request_put(rq);
2754 if (err)
2755 break;
2756
2757 if (++idx == ps->nengines)
2758 idx = 0;
2759 } while (!__igt_timeout(timeout: end_time, NULL));
2760
2761 return err;
2762}
2763
2764static int s_sync1(void *arg)
2765{
2766 struct perf_series *ps = arg;
2767 struct i915_request *prev = NULL;
2768 IGT_TIMEOUT(end_time);
2769 unsigned int idx = 0;
2770 int err = 0;
2771
2772 GEM_BUG_ON(!ps->nengines);
2773 do {
2774 struct i915_request *rq;
2775
2776 rq = i915_request_create(ce: ps->ce[idx]);
2777 if (IS_ERR(ptr: rq)) {
2778 err = PTR_ERR(ptr: rq);
2779 break;
2780 }
2781
2782 i915_request_get(rq);
2783 i915_request_add(rq);
2784
2785 if (prev && i915_request_wait(rq: prev, flags: 0, HZ / 5) < 0)
2786 err = -ETIME;
2787 i915_request_put(rq: prev);
2788 prev = rq;
2789 if (err)
2790 break;
2791
2792 if (++idx == ps->nengines)
2793 idx = 0;
2794 } while (!__igt_timeout(timeout: end_time, NULL));
2795 i915_request_put(rq: prev);
2796
2797 return err;
2798}
2799
2800static int s_many(void *arg)
2801{
2802 struct perf_series *ps = arg;
2803 IGT_TIMEOUT(end_time);
2804 unsigned int idx = 0;
2805
2806 GEM_BUG_ON(!ps->nengines);
2807 do {
2808 struct i915_request *rq;
2809
2810 rq = i915_request_create(ce: ps->ce[idx]);
2811 if (IS_ERR(ptr: rq))
2812 return PTR_ERR(ptr: rq);
2813
2814 i915_request_add(rq);
2815
2816 if (++idx == ps->nengines)
2817 idx = 0;
2818 } while (!__igt_timeout(timeout: end_time, NULL));
2819
2820 return 0;
2821}
2822
2823static int perf_series_engines(void *arg)
2824{
2825 struct drm_i915_private *i915 = arg;
2826 static int (* const func[])(void *arg) = {
2827 s_sync0,
2828 s_sync1,
2829 s_many,
2830 NULL,
2831 };
2832 const unsigned int nengines = num_uabi_engines(i915);
2833 struct intel_engine_cs *engine;
2834 int (* const *fn)(void *arg);
2835 struct pm_qos_request qos;
2836 struct perf_stats *stats;
2837 struct perf_series *ps;
2838 unsigned int idx;
2839 int err = 0;
2840
2841 stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
2842 if (!stats)
2843 return -ENOMEM;
2844
2845 ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
2846 if (!ps) {
2847 kfree(objp: stats);
2848 return -ENOMEM;
2849 }
2850
2851 cpu_latency_qos_add_request(req: &qos, value: 0); /* disable cstates */
2852
2853 ps->i915 = i915;
2854 ps->nengines = nengines;
2855
2856 idx = 0;
2857 for_each_uabi_engine(engine, i915) {
2858 struct intel_context *ce;
2859
2860 ce = intel_context_create(engine);
2861 if (IS_ERR(ptr: ce)) {
2862 err = PTR_ERR(ptr: ce);
2863 goto out;
2864 }
2865
2866 err = intel_context_pin(ce);
2867 if (err) {
2868 intel_context_put(ce);
2869 goto out;
2870 }
2871
2872 ps->ce[idx++] = ce;
2873 }
2874 GEM_BUG_ON(idx != ps->nengines);
2875
2876 for (fn = func; *fn && !err; fn++) {
2877 char name[KSYM_NAME_LEN];
2878 struct igt_live_test t;
2879
2880 snprintf(buf: name, size: sizeof(name), fmt: "%ps", *fn);
2881 err = igt_live_test_begin(t: &t, i915, func: __func__, name);
2882 if (err)
2883 break;
2884
2885 for (idx = 0; idx < nengines; idx++) {
2886 struct perf_stats *p =
2887 memset(&stats[idx], 0, sizeof(stats[idx]));
2888 struct intel_context *ce = ps->ce[idx];
2889
2890 p->engine = ps->ce[idx]->engine;
2891 intel_engine_pm_get(engine: p->engine);
2892
2893 if (intel_engine_supports_stats(engine: p->engine))
2894 p->busy = intel_engine_get_busy_time(engine: p->engine,
2895 now: &p->time) + 1;
2896 else
2897 p->time = ktime_get();
2898 p->runtime = -intel_context_get_total_runtime_ns(ce);
2899 }
2900
2901 err = (*fn)(ps);
2902 if (igt_live_test_end(t: &t))
2903 err = -EIO;
2904
2905 for (idx = 0; idx < nengines; idx++) {
2906 struct perf_stats *p = &stats[idx];
2907 struct intel_context *ce = ps->ce[idx];
2908 int integer, decimal;
2909 u64 busy, dt, now;
2910
2911 if (p->busy)
2912 p->busy = ktime_sub(intel_engine_get_busy_time(p->engine,
2913 &now),
2914 p->busy - 1);
2915 else
2916 now = ktime_get();
2917 p->time = ktime_sub(now, p->time);
2918
2919 err = switch_to_kernel_sync(ce, err);
2920 p->runtime += intel_context_get_total_runtime_ns(ce);
2921 intel_engine_pm_put(engine: p->engine);
2922
2923 busy = 100 * ktime_to_ns(kt: p->busy);
2924 dt = ktime_to_ns(kt: p->time);
2925 if (dt) {
2926 integer = div64_u64(dividend: busy, divisor: dt);
2927 busy -= integer * dt;
2928 decimal = div64_u64(dividend: 100 * busy, divisor: dt);
2929 } else {
2930 integer = 0;
2931 decimal = 0;
2932 }
2933
2934 pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
2935 name, p->engine->name, ce->timeline->seqno,
2936 integer, decimal,
2937 div_u64(p->runtime, 1000 * 1000),
2938 div_u64(ktime_to_ns(p->time), 1000 * 1000));
2939 }
2940 }
2941
2942out:
2943 for (idx = 0; idx < nengines; idx++) {
2944 if (IS_ERR_OR_NULL(ptr: ps->ce[idx]))
2945 break;
2946
2947 intel_context_unpin(ce: ps->ce[idx]);
2948 intel_context_put(ce: ps->ce[idx]);
2949 }
2950 kfree(objp: ps);
2951
2952 cpu_latency_qos_remove_request(req: &qos);
2953 kfree(objp: stats);
2954 return err;
2955}
2956
2957struct p_thread {
2958 struct perf_stats p;
2959 struct kthread_worker *worker;
2960 struct kthread_work work;
2961 struct intel_engine_cs *engine;
2962 int result;
2963};
2964
2965static void p_sync0(struct kthread_work *work)
2966{
2967 struct p_thread *thread = container_of(work, typeof(*thread), work);
2968 struct perf_stats *p = &thread->p;
2969 struct intel_engine_cs *engine = p->engine;
2970 struct intel_context *ce;
2971 IGT_TIMEOUT(end_time);
2972 unsigned long count;
2973 bool busy;
2974 int err = 0;
2975
2976 ce = intel_context_create(engine);
2977 if (IS_ERR(ptr: ce)) {
2978 thread->result = PTR_ERR(ptr: ce);
2979 return;
2980 }
2981
2982 err = intel_context_pin(ce);
2983 if (err) {
2984 intel_context_put(ce);
2985 thread->result = err;
2986 return;
2987 }
2988
2989 if (intel_engine_supports_stats(engine)) {
2990 p->busy = intel_engine_get_busy_time(engine, now: &p->time);
2991 busy = true;
2992 } else {
2993 p->time = ktime_get();
2994 busy = false;
2995 }
2996
2997 count = 0;
2998 do {
2999 struct i915_request *rq;
3000
3001 rq = i915_request_create(ce);
3002 if (IS_ERR(ptr: rq)) {
3003 err = PTR_ERR(ptr: rq);
3004 break;
3005 }
3006
3007 i915_request_get(rq);
3008 i915_request_add(rq);
3009
3010 err = 0;
3011 if (i915_request_wait(rq, flags: 0, HZ) < 0)
3012 err = -ETIME;
3013 i915_request_put(rq);
3014 if (err)
3015 break;
3016
3017 count++;
3018 } while (!__igt_timeout(timeout: end_time, NULL));
3019
3020 if (busy) {
3021 ktime_t now;
3022
3023 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3024 p->busy);
3025 p->time = ktime_sub(now, p->time);
3026 } else {
3027 p->time = ktime_sub(ktime_get(), p->time);
3028 }
3029
3030 err = switch_to_kernel_sync(ce, err);
3031 p->runtime = intel_context_get_total_runtime_ns(ce);
3032 p->count = count;
3033
3034 intel_context_unpin(ce);
3035 intel_context_put(ce);
3036 thread->result = err;
3037}
3038
3039static void p_sync1(struct kthread_work *work)
3040{
3041 struct p_thread *thread = container_of(work, typeof(*thread), work);
3042 struct perf_stats *p = &thread->p;
3043 struct intel_engine_cs *engine = p->engine;
3044 struct i915_request *prev = NULL;
3045 struct intel_context *ce;
3046 IGT_TIMEOUT(end_time);
3047 unsigned long count;
3048 bool busy;
3049 int err = 0;
3050
3051 ce = intel_context_create(engine);
3052 if (IS_ERR(ptr: ce)) {
3053 thread->result = PTR_ERR(ptr: ce);
3054 return;
3055 }
3056
3057 err = intel_context_pin(ce);
3058 if (err) {
3059 intel_context_put(ce);
3060 thread->result = err;
3061 return;
3062 }
3063
3064 if (intel_engine_supports_stats(engine)) {
3065 p->busy = intel_engine_get_busy_time(engine, now: &p->time);
3066 busy = true;
3067 } else {
3068 p->time = ktime_get();
3069 busy = false;
3070 }
3071
3072 count = 0;
3073 do {
3074 struct i915_request *rq;
3075
3076 rq = i915_request_create(ce);
3077 if (IS_ERR(ptr: rq)) {
3078 err = PTR_ERR(ptr: rq);
3079 break;
3080 }
3081
3082 i915_request_get(rq);
3083 i915_request_add(rq);
3084
3085 err = 0;
3086 if (prev && i915_request_wait(rq: prev, flags: 0, HZ) < 0)
3087 err = -ETIME;
3088 i915_request_put(rq: prev);
3089 prev = rq;
3090 if (err)
3091 break;
3092
3093 count++;
3094 } while (!__igt_timeout(timeout: end_time, NULL));
3095 i915_request_put(rq: prev);
3096
3097 if (busy) {
3098 ktime_t now;
3099
3100 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3101 p->busy);
3102 p->time = ktime_sub(now, p->time);
3103 } else {
3104 p->time = ktime_sub(ktime_get(), p->time);
3105 }
3106
3107 err = switch_to_kernel_sync(ce, err);
3108 p->runtime = intel_context_get_total_runtime_ns(ce);
3109 p->count = count;
3110
3111 intel_context_unpin(ce);
3112 intel_context_put(ce);
3113 thread->result = err;
3114}
3115
3116static void p_many(struct kthread_work *work)
3117{
3118 struct p_thread *thread = container_of(work, typeof(*thread), work);
3119 struct perf_stats *p = &thread->p;
3120 struct intel_engine_cs *engine = p->engine;
3121 struct intel_context *ce;
3122 IGT_TIMEOUT(end_time);
3123 unsigned long count;
3124 int err = 0;
3125 bool busy;
3126
3127 ce = intel_context_create(engine);
3128 if (IS_ERR(ptr: ce)) {
3129 thread->result = PTR_ERR(ptr: ce);
3130 return;
3131 }
3132
3133 err = intel_context_pin(ce);
3134 if (err) {
3135 intel_context_put(ce);
3136 thread->result = err;
3137 return;
3138 }
3139
3140 if (intel_engine_supports_stats(engine)) {
3141 p->busy = intel_engine_get_busy_time(engine, now: &p->time);
3142 busy = true;
3143 } else {
3144 p->time = ktime_get();
3145 busy = false;
3146 }
3147
3148 count = 0;
3149 do {
3150 struct i915_request *rq;
3151
3152 rq = i915_request_create(ce);
3153 if (IS_ERR(ptr: rq)) {
3154 err = PTR_ERR(ptr: rq);
3155 break;
3156 }
3157
3158 i915_request_add(rq);
3159 count++;
3160 } while (!__igt_timeout(timeout: end_time, NULL));
3161
3162 if (busy) {
3163 ktime_t now;
3164
3165 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now),
3166 p->busy);
3167 p->time = ktime_sub(now, p->time);
3168 } else {
3169 p->time = ktime_sub(ktime_get(), p->time);
3170 }
3171
3172 err = switch_to_kernel_sync(ce, err);
3173 p->runtime = intel_context_get_total_runtime_ns(ce);
3174 p->count = count;
3175
3176 intel_context_unpin(ce);
3177 intel_context_put(ce);
3178 thread->result = err;
3179}
3180
3181static int perf_parallel_engines(void *arg)
3182{
3183 struct drm_i915_private *i915 = arg;
3184 static void (* const func[])(struct kthread_work *) = {
3185 p_sync0,
3186 p_sync1,
3187 p_many,
3188 NULL,
3189 };
3190 const unsigned int nengines = num_uabi_engines(i915);
3191 void (* const *fn)(struct kthread_work *);
3192 struct intel_engine_cs *engine;
3193 struct pm_qos_request qos;
3194 struct p_thread *engines;
3195 int err = 0;
3196
3197 engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
3198 if (!engines)
3199 return -ENOMEM;
3200
3201 cpu_latency_qos_add_request(req: &qos, value: 0);
3202
3203 for (fn = func; *fn; fn++) {
3204 char name[KSYM_NAME_LEN];
3205 struct igt_live_test t;
3206 unsigned int idx;
3207
3208 snprintf(buf: name, size: sizeof(name), fmt: "%ps", *fn);
3209 err = igt_live_test_begin(t: &t, i915, func: __func__, name);
3210 if (err)
3211 break;
3212
3213 atomic_set(v: &i915->selftest.counter, i: nengines);
3214
3215 idx = 0;
3216 for_each_uabi_engine(engine, i915) {
3217 struct kthread_worker *worker;
3218
3219 intel_engine_pm_get(engine);
3220
3221 memset(&engines[idx].p, 0, sizeof(engines[idx].p));
3222
3223 worker = kthread_run_worker(0, "igt:%s",
3224 engine->name);
3225 if (IS_ERR(ptr: worker)) {
3226 err = PTR_ERR(ptr: worker);
3227 intel_engine_pm_put(engine);
3228 break;
3229 }
3230 engines[idx].worker = worker;
3231 engines[idx].result = 0;
3232 engines[idx].p.engine = engine;
3233 engines[idx].engine = engine;
3234
3235 kthread_init_work(&engines[idx].work, *fn);
3236 kthread_queue_work(worker, work: &engines[idx].work);
3237 idx++;
3238 }
3239
3240 idx = 0;
3241 for_each_uabi_engine(engine, i915) {
3242 int status;
3243
3244 if (!engines[idx].worker)
3245 break;
3246
3247 kthread_flush_work(work: &engines[idx].work);
3248 status = READ_ONCE(engines[idx].result);
3249 if (status && !err)
3250 err = status;
3251
3252 intel_engine_pm_put(engine);
3253
3254 kthread_destroy_worker(worker: engines[idx].worker);
3255 idx++;
3256 }
3257
3258 if (igt_live_test_end(t: &t))
3259 err = -EIO;
3260 if (err)
3261 break;
3262
3263 idx = 0;
3264 for_each_uabi_engine(engine, i915) {
3265 struct perf_stats *p = &engines[idx].p;
3266 u64 busy = 100 * ktime_to_ns(kt: p->busy);
3267 u64 dt = ktime_to_ns(kt: p->time);
3268 int integer, decimal;
3269
3270 if (dt) {
3271 integer = div64_u64(dividend: busy, divisor: dt);
3272 busy -= integer * dt;
3273 decimal = div64_u64(dividend: 100 * busy, divisor: dt);
3274 } else {
3275 integer = 0;
3276 decimal = 0;
3277 }
3278
3279 GEM_BUG_ON(engine != p->engine);
3280 pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
3281 name, engine->name, p->count, integer, decimal,
3282 div_u64(p->runtime, 1000 * 1000),
3283 div_u64(ktime_to_ns(p->time), 1000 * 1000));
3284 idx++;
3285 }
3286 }
3287
3288 cpu_latency_qos_remove_request(req: &qos);
3289 kfree(objp: engines);
3290 return err;
3291}
3292
3293int i915_request_perf_selftests(struct drm_i915_private *i915)
3294{
3295 static const struct i915_subtest tests[] = {
3296 SUBTEST(perf_request_latency),
3297 SUBTEST(perf_series_engines),
3298 SUBTEST(perf_parallel_engines),
3299 };
3300
3301 if (intel_gt_is_wedged(gt: to_gt(i915)))
3302 return 0;
3303
3304 return i915_subtests(tests, i915);
3305}
3306

source code of linux/drivers/gpu/drm/i915/selftests/i915_request.c