amdgpu_ring_mux.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c]

1	/*
2	* Copyright 2022 Advanced Micro Devices, Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*
22	*/
23	#include <linux/slab.h>
24	#include <drm/drm_print.h>
25
26	#include "amdgpu_ring_mux.h"
27	#include "amdgpu_ring.h"
28	#include "amdgpu.h"
29
30	#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
31	#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
32
33	static const struct ring_info {
34	unsigned int hw_pio;
35	const char *ring_name;
36	} sw_ring_info[] = {
37	{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
38	{ AMDGPU_RING_PRIO_2, "gfx_high"},
39	};
40
41	static struct kmem_cache *amdgpu_mux_chunk_slab;
42
43	static inline struct amdgpu_mux_entry amdgpu_ring_mux_sw_entry(struct* amdgpu_ring_mux *mux,
44	struct amdgpu_ring *ring)
45	{
46	return ring->entry_index < mux->ring_entry_size ?
47	&mux->ring_entry[ring->entry_index] : NULL;
48	}
49
50	/ copy packages on sw ring range[begin, end) /
51	static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
52	struct amdgpu_ring *ring,
53	u64 s_start, u64 s_end)
54	{
55	u64 start, end;
56	struct amdgpu_ring *real_ring = mux->real_ring;
57
58	start = s_start & ring->buf_mask;
59	end = s_end & ring->buf_mask;
60
61	if (start == end) {
62	DRM_ERROR("no more data copied from sw ring\n");
63	return;
64	}
65	if (start > end) {
66	amdgpu_ring_alloc(ring: real_ring, ndw: (ring->ring_size >> `2`) + end - start);
67	amdgpu_ring_write_multiple(ring: real_ring, src: (void *)&ring->ring[start],
68	count_dw: (ring->ring_size >> `2`) - start);
69	amdgpu_ring_write_multiple(ring: real_ring, src: (void *)&ring->ring[`0`], count_dw: end);
70	} else {
71	amdgpu_ring_alloc(ring: real_ring, ndw: end - start);
72	amdgpu_ring_write_multiple(ring: real_ring, src: (void *)&ring->ring[start], count_dw: end - start);
73	}
74	}
75
76	static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
77	{
78	struct amdgpu_mux_entry *e = NULL;
79	struct amdgpu_mux_chunk *chunk;
80	uint32_t seq, last_seq;
81	int i;
82
83	/find low priority entries:/
84	if (!mux->s_resubmit)
85	return;
86
87	for (i = `0`; i < mux->num_ring_entries; i++) {
88	if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
89	e = &mux->ring_entry[i];
90	break;
91	}
92	}
93
94	if (!e) {
95	DRM_ERROR("%s no low priority ring found\n", __func__);
96	return;
97	}
98
99	last_seq = atomic_read(v: &e->ring->fence_drv.last_seq);
100	seq = mux->seqno_to_resubmit;
101	if (last_seq < seq) {
102	/resubmit all the fences between (last_seq, seq]/
103	list_for_each_entry(chunk, &e->list, entry) {
104	if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
105	amdgpu_fence_update_start_timestamp(ring: e->ring,
106	seq: chunk->sync_seq,
107	timestamp: ktime_get());
108	if (chunk->sync_seq ==
109	le32_to_cpu(*(e->ring->fence_drv.cpu_addr + `2`))) {
110	if (chunk->cntl_offset <= e->ring->buf_mask)
111	amdgpu_ring_patch_cntl(e->ring,
112	chunk->cntl_offset);
113	if (chunk->ce_offset <= e->ring->buf_mask)
114	amdgpu_ring_patch_ce(e->ring, chunk->ce_offset);
115	if (chunk->de_offset <= e->ring->buf_mask)
116	amdgpu_ring_patch_de(e->ring, chunk->de_offset);
117	}
118	amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring: e->ring,
119	s_start: chunk->start,
120	s_end: chunk->end);
121	mux->wptr_resubmit = chunk->end;
122	amdgpu_ring_commit(ring: mux->real_ring);
123	}
124	}
125	}
126
127	timer_delete(timer: &mux->resubmit_timer);
128	mux->s_resubmit = false;
129	}
130
131	static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
132	{
133	mod_timer(timer: &mux->resubmit_timer, expires: jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
134	}
135
136	static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
137	{
138	struct amdgpu_ring_mux *mux = timer_container_of(mux, t,
139	resubmit_timer);
140
141	if (!spin_trylock(lock: &mux->lock)) {
142	amdgpu_ring_mux_schedule_resubmit(mux);
143	DRM_ERROR("reschedule resubmit\n");
144	return;
145	}
146	amdgpu_mux_resubmit_chunks(mux);
147	spin_unlock(lock: &mux->lock);
148	}
149
150	int amdgpu_ring_mux_init(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring,
151	unsigned int entry_size)
152	{
153	mux->real_ring = ring;
154	mux->num_ring_entries = `0`;
155
156	mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
157	if (!mux->ring_entry)
158	return -ENOMEM;
159
160	mux->ring_entry_size = entry_size;
161	mux->s_resubmit = false;
162
163	amdgpu_mux_chunk_slab = KMEM_CACHE(amdgpu_mux_chunk, SLAB_HWCACHE_ALIGN);
164	if (!amdgpu_mux_chunk_slab) {
165	DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
166	return -ENOMEM;
167	}
168
169	spin_lock_init(&mux->lock);
170	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, `0`);
171
172	return `0`;
173	}
174
175	void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
176	{
177	struct amdgpu_mux_entry *e;
178	struct amdgpu_mux_chunk chunk, chunk2;
179	int i;
180
181	for (i = `0`; i < mux->num_ring_entries; i++) {
182	e = &mux->ring_entry[i];
183	list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
184	list_del(entry: &chunk->entry);
185	kmem_cache_free(s: amdgpu_mux_chunk_slab, objp: chunk);
186	}
187	}
188	kmem_cache_destroy(s: amdgpu_mux_chunk_slab);
189	kfree(objp: mux->ring_entry);
190	mux->ring_entry = NULL;
191	mux->num_ring_entries = `0`;
192	mux->ring_entry_size = `0`;
193	}
194
195	int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
196	{
197	struct amdgpu_mux_entry *e;
198
199	if (mux->num_ring_entries >= mux->ring_entry_size) {
200	DRM_ERROR("add sw ring exceeding max entry size\n");
201	return -ENOENT;
202	}
203
204	e = &mux->ring_entry[mux->num_ring_entries];
205	ring->entry_index = mux->num_ring_entries;
206	e->ring = ring;
207
208	INIT_LIST_HEAD(list: &e->list);
209	mux->num_ring_entries += `1`;
210	return `0`;
211	}
212
213	void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring, u64 wptr)
214	{
215	struct amdgpu_mux_entry *e;
216
217	spin_lock(lock: &mux->lock);
218
219	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
220	amdgpu_mux_resubmit_chunks(mux);
221
222	e = amdgpu_ring_mux_sw_entry(mux, ring);
223	if (!e) {
224	DRM_ERROR("cannot find entry for sw ring\n");
225	spin_unlock(lock: &mux->lock);
226	return;
227	}
228
229	/ We could skip this set wptr as preemption in process. /
230	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
231	spin_unlock(lock: &mux->lock);
232	return;
233	}
234
235	e->sw_cptr = e->sw_wptr;
236	/ Update cptr if the package already copied in resubmit functions /
237	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
238	e->sw_cptr = mux->wptr_resubmit;
239	e->sw_wptr = wptr;
240	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
241
242	/ Skip copying for the packages already resubmitted./
243	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT \|\| mux->wptr_resubmit < wptr) {
244	amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, s_start: e->sw_cptr, s_end: wptr);
245	e->end_ptr_in_hw_ring = mux->real_ring->wptr;
246	amdgpu_ring_commit(ring: mux->real_ring);
247	} else {
248	e->end_ptr_in_hw_ring = mux->real_ring->wptr;
249	}
250	spin_unlock(lock: &mux->lock);
251	}
252
253	u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
254	{
255	struct amdgpu_mux_entry *e;
256
257	e = amdgpu_ring_mux_sw_entry(mux, ring);
258	if (!e) {
259	DRM_ERROR("cannot find entry for sw ring\n");
260	return `0`;
261	}
262
263	return e->sw_wptr;
264	}
265
266	/**
267	* amdgpu_ring_mux_get_rptr - get the readptr of the software ring
268	* @mux: the multiplexer the software rings attach to
269	* @ring: the software ring of which we calculate the readptr
270	*
271	* The return value of the readptr is not precise while the other rings could
272	* write data onto the real ring buffer.After overwriting on the real ring, we
273	* can not decide if our packages have been excuted or not read yet. However,
274	* this function is only called by the tools such as umr to collect the latest
275	* packages for the hang analysis. We assume the hang happens near our latest
276	* submit. Thus we could use the following logic to give the clue:
277	* If the readptr is between start and end, then we return the copy pointer
278	* plus the distance from start to readptr. If the readptr is before start, we
279	* return the copy pointer. Lastly, if the readptr is past end, we return the
280	* write pointer.
281	*/
282	u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
283	{
284	struct amdgpu_mux_entry *e;
285	u64 readp, offset, start, end;
286
287	e = amdgpu_ring_mux_sw_entry(mux, ring);
288	if (!e) {
289	DRM_ERROR("no sw entry found!\n");
290	return `0`;
291	}
292
293	readp = amdgpu_ring_get_rptr(mux->real_ring);
294
295	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
296	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
297	if (start > end) {
298	if (readp <= end)
299	readp += mux->real_ring->ring_size >> `2`;
300	end += mux->real_ring->ring_size >> `2`;
301	}
302
303	if (start <= readp && readp <= end) {
304	offset = readp - start;
305	e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
306	} else if (readp < start) {
307	e->sw_rptr = e->sw_cptr;
308	} else {
309	/ end < readptr /
310	e->sw_rptr = e->sw_wptr;
311	}
312
313	return e->sw_rptr;
314	}
315
316	u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
317	{
318	struct amdgpu_device *adev = ring->adev;
319	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
320
321	WARN_ON(!ring->is_sw_ring);
322	return amdgpu_ring_mux_get_rptr(mux, ring);
323	}
324
325	u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
326	{
327	struct amdgpu_device *adev = ring->adev;
328	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
329
330	WARN_ON(!ring->is_sw_ring);
331	return amdgpu_ring_mux_get_wptr(mux, ring);
332	}
333
334	void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
335	{
336	struct amdgpu_device *adev = ring->adev;
337	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
338
339	WARN_ON(!ring->is_sw_ring);
340	amdgpu_ring_mux_set_wptr(mux, ring, wptr: ring->wptr);
341	}
342
343	/ Override insert_nop to prevent emitting nops to the software rings /
344	void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
345	{
346	WARN_ON(!ring->is_sw_ring);
347	}
348
349	const char amdgpu_sw_ring_name(int* idx)
350	{
351	return idx < ARRAY_SIZE(sw_ring_info) ?
352	sw_ring_info[idx].ring_name : NULL;
353	}
354
355	unsigned int amdgpu_sw_ring_priority(int idx)
356	{
357	return idx < ARRAY_SIZE(sw_ring_info) ?
358	sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
359	}
360
361	/Scan on low prio rings to have unsignaled fence and high ring has no fence./
362	static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
363	{
364	struct amdgpu_ring *ring;
365	int i, need_preempt;
366
367	need_preempt = `0`;
368	for (i = `0`; i < mux->num_ring_entries; i++) {
369	ring = mux->ring_entry[i].ring;
370	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
371	amdgpu_fence_count_emitted(ring) > `0`)
372	return `0`;
373	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
374	amdgpu_fence_last_unsignaled_time_us(ring) >
375	AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
376	need_preempt = `1`;
377	}
378	return need_preempt && !mux->s_resubmit;
379	}
380
381	/ Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. /
382	static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
383	{
384	int r;
385
386	spin_lock(lock: &mux->lock);
387	mux->pending_trailing_fence_signaled = true;
388	r = amdgpu_ring_preempt_ib(mux->real_ring);
389	spin_unlock(lock: &mux->lock);
390	return r;
391	}
392
393	void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
394	{
395	struct amdgpu_device *adev = ring->adev;
396	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
397
398	WARN_ON(!ring->is_sw_ring);
399	if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
400	if (amdgpu_mcbp_scan(mux) > `0`)
401	amdgpu_mcbp_trigger_preempt(mux);
402	return;
403	}
404
405	amdgpu_ring_mux_start_ib(mux, ring);
406	}
407
408	void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
409	{
410	struct amdgpu_device *adev = ring->adev;
411	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
412
413	WARN_ON(!ring->is_sw_ring);
414	if (adev->gfx.mcbp && ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
415	return;
416	amdgpu_ring_mux_end_ib(mux, ring);
417	}
418
419	void amdgpu_sw_ring_ib_mark_offset(struct amdgpu_ring ring, enum* amdgpu_ring_mux_offset_type type)
420	{
421	struct amdgpu_device *adev = ring->adev;
422	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
423	unsigned offset;
424
425	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
426	return;
427
428	offset = ring->wptr & ring->buf_mask;
429
430	amdgpu_ring_mux_ib_mark_offset(mux, ring, offset, type);
431	}
432
433	void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
434	{
435	struct amdgpu_mux_entry *e;
436	struct amdgpu_mux_chunk *chunk;
437
438	spin_lock(lock: &mux->lock);
439	amdgpu_mux_resubmit_chunks(mux);
440	spin_unlock(lock: &mux->lock);
441
442	e = amdgpu_ring_mux_sw_entry(mux, ring);
443	if (!e) {
444	DRM_ERROR("cannot find entry!\n");
445	return;
446	}
447
448	chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
449	if (!chunk) {
450	DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
451	return;
452	}
453
454	chunk->start = ring->wptr;
455	/ the initialized value used to check if they are set by the ib submission/
456	chunk->cntl_offset = ring->buf_mask + `1`;
457	chunk->de_offset = ring->buf_mask + `1`;
458	chunk->ce_offset = ring->buf_mask + `1`;
459	list_add_tail(new: &chunk->entry, head: &e->list);
460	}
461
462	static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
463	{
464	uint32_t last_seq = `0`;
465	struct amdgpu_mux_entry *e;
466	struct amdgpu_mux_chunk chunk, tmp;
467
468	e = amdgpu_ring_mux_sw_entry(mux, ring);
469	if (!e) {
470	DRM_ERROR("cannot find entry!\n");
471	return;
472	}
473
474	last_seq = atomic_read(v: &ring->fence_drv.last_seq);
475
476	list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
477	if (chunk->sync_seq <= last_seq) {
478	list_del(entry: &chunk->entry);
479	kmem_cache_free(s: amdgpu_mux_chunk_slab, objp: chunk);
480	}
481	}
482	}
483
484	void amdgpu_ring_mux_ib_mark_offset(struct amdgpu_ring_mux *mux,
485	struct amdgpu_ring *ring, u64 offset,
486	enum amdgpu_ring_mux_offset_type type)
487	{
488	struct amdgpu_mux_entry *e;
489	struct amdgpu_mux_chunk *chunk;
490
491	e = amdgpu_ring_mux_sw_entry(mux, ring);
492	if (!e) {
493	DRM_ERROR("cannot find entry!\n");
494	return;
495	}
496
497	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
498	if (!chunk) {
499	DRM_ERROR("cannot find chunk!\n");
500	return;
501	}
502
503	switch (type) {
504	case AMDGPU_MUX_OFFSET_TYPE_CONTROL:
505	chunk->cntl_offset = offset;
506	break;
507	case AMDGPU_MUX_OFFSET_TYPE_DE:
508	chunk->de_offset = offset;
509	break;
510	case AMDGPU_MUX_OFFSET_TYPE_CE:
511	chunk->ce_offset = offset;
512	break;
513	default:
514	DRM_ERROR("invalid type (%d)\n", type);
515	break;
516	}
517	}
518
519	void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux mux, struct* amdgpu_ring *ring)
520	{
521	struct amdgpu_mux_entry *e;
522	struct amdgpu_mux_chunk *chunk;
523
524	e = amdgpu_ring_mux_sw_entry(mux, ring);
525	if (!e) {
526	DRM_ERROR("cannot find entry!\n");
527	return;
528	}
529
530	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
531	if (!chunk) {
532	DRM_ERROR("cannot find chunk!\n");
533	return;
534	}
535
536	chunk->end = ring->wptr;
537	chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
538
539	scan_and_remove_signaled_chunk(mux, ring);
540	}
541
542	bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
543	{
544	struct amdgpu_mux_entry *e;
545	struct amdgpu_ring *ring = NULL;
546	int i;
547
548	if (!mux->pending_trailing_fence_signaled)
549	return false;
550
551	if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
552	return false;
553
554	for (i = `0`; i < mux->num_ring_entries; i++) {
555	e = &mux->ring_entry[i];
556	if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
557	ring = e->ring;
558	break;
559	}
560	}
561
562	if (!ring) {
563	DRM_ERROR("cannot find low priority ring\n");
564	return false;
565	}
566
567	amdgpu_fence_process(ring);
568	if (amdgpu_fence_count_emitted(ring) > `0`) {
569	mux->s_resubmit = true;
570	mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
571	amdgpu_ring_mux_schedule_resubmit(mux);
572	}
573
574	mux->pending_trailing_fence_signaled = false;
575	return true;
576	}
577

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c