kfd_packet_manager.c source code [linux/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c]

1	// SPDX-License-Identifier: GPL-2.0 OR MIT
2	/*
3	* Copyright 2014-2022 Advanced Micro Devices, Inc.
4	*
5	* Permission is hereby granted, free of charge, to any person obtaining a
6	* copy of this software and associated documentation files (the "Software"),
7	* to deal in the Software without restriction, including without limitation
8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9	* and/or sell copies of the Software, and to permit persons to whom the
10	* Software is furnished to do so, subject to the following conditions:
11	*
12	* The above copyright notice and this permission notice shall be included in
13	* all copies or substantial portions of the Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21	* OTHER DEALINGS IN THE SOFTWARE.
22	*
23	*/
24
25	#include <linux/slab.h>
26	#include <linux/mutex.h>
27	#include "kfd_device_queue_manager.h"
28	#include "kfd_kernel_queue.h"
29	#include "kfd_priv.h"
30
31	#define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0)
32	#define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1)
33	#define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2)
34	#define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3)
35
36	static inline void inc_wptr(unsigned int wptr, unsigned* int increment_bytes,
37	unsigned int buffer_size_bytes)
38	{
39	unsigned int temp = wptr + increment_bytes / sizeof*(uint32_t);
40
41	WARN((temp * sizeof(uint32_t)) > buffer_size_bytes,
42	"Runlist IB overflow");
43	*wptr = temp;
44	}
45
46	static void pm_calc_rlib_size(struct packet_manager *pm,
47	unsigned int *rlib_size,
48	int *over_subscription,
49	int xnack_conflict)
50	{
51	unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
52	unsigned int map_queue_size;
53	unsigned int max_proc_per_quantum = `1`;
54	struct kfd_node *node = pm->dqm->dev;
55	struct device *dev = node->adev->dev;
56
57	process_count = pm->dqm->processes_count;
58	queue_count = pm->dqm->active_queue_count;
59	compute_queue_count = pm->dqm->active_cp_queue_count;
60	gws_queue_count = pm->dqm->gws_queue_count;
61
62	/ check if there is over subscription*
63	* Note: the arbitration between the number of VMIDs and
64	* hws_max_conc_proc has been done in
65	* kgd2kfd_device_init().
66	*/
67	*over_subscription = `0`;
68
69	if (node->max_proc_per_quantum > `1`)
70	max_proc_per_quantum = node->max_proc_per_quantum;
71
72	if (process_count > max_proc_per_quantum)
73	*over_subscription \|= OVER_SUBSCRIPTION_PROCESS_COUNT;
74	if (compute_queue_count > get_cp_queues_num(dqm: pm->dqm))
75	*over_subscription \|= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT;
76	if (gws_queue_count > `1`)
77	*over_subscription \|= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT;
78	if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN))
79	*over_subscription \|= OVER_SUBSCRIPTION_XNACK_CONFLICT;
80
81	if (*over_subscription)
82	dev_dbg(dev, "Over subscribed runlist\n");
83
84	map_queue_size = pm->pmf->map_queues_size;
85	/ calculate run list ib allocation size /
86	rlib_size = process_count pm->pmf->map_process_size +
87	queue_count * map_queue_size;
88
89	/*
90	* Increase the allocation size in case we need a chained run list
91	* when over subscription
92	*/
93	if (*over_subscription)
94	*rlib_size += pm->pmf->runlist_size;
95
96	dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
97	}
98
99	static int pm_allocate_runlist_ib(struct packet_manager *pm,
100	unsigned int **rl_buffer,
101	uint64_t *rl_gpu_buffer,
102	unsigned int *rl_buffer_size,
103	int *is_over_subscription,
104	int xnack_conflict)
105	{
106	struct kfd_node *node = pm->dqm->dev;
107	struct device *dev = node->adev->dev;
108	int retval;
109
110	if (WARN_ON(pm->allocated))
111	return -EINVAL;
112
113	pm_calc_rlib_size(pm, rlib_size: rl_buffer_size, over_subscription: is_over_subscription,
114	xnack_conflict);
115
116	mutex_lock(&pm->lock);
117
118	retval = kfd_gtt_sa_allocate(node, size: *rl_buffer_size, mem_obj: &pm->ib_buffer_obj);
119
120	if (retval) {
121	dev_err(dev, "Failed to allocate runlist IB\n");
122	goto out;
123	}
124
125	(void* **)rl_buffer = pm->ib_buffer_obj->cpu_ptr;
126	*rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr;
127
128	memset(rl_buffer, `0`, rl_buffer_size);
129	pm->allocated = true;
130
131	out:
132	mutex_unlock(lock: &pm->lock);
133	return retval;
134	}
135
136	static int pm_create_runlist_ib(struct packet_manager *pm,
137	struct list_head *queues,
138	uint64_t *rl_gpu_addr,
139	size_t *rl_size_bytes)
140	{
141	unsigned int alloc_size_bytes;
142	unsigned int *rl_buffer, rl_wptr, i;
143	struct kfd_node *node = pm->dqm->dev;
144	struct device *dev = node->adev->dev;
145	int retval, processes_mapped;
146	struct device_process_node *cur;
147	struct qcm_process_device *qpd;
148	struct queue *q;
149	struct kernel_queue *kq;
150	int is_over_subscription;
151	int xnack_enabled = -`1`;
152	bool xnack_conflict = `0`;
153
154	rl_wptr = retval = processes_mapped = `0`;
155
156	/ Check if processes set different xnack modes /
157	list_for_each_entry(cur, queues, list) {
158	qpd = cur->qpd;
159	if (xnack_enabled < `0`)
160	/ First process /
161	xnack_enabled = qpd->pqm->process->xnack_enabled;
162	else if (qpd->pqm->process->xnack_enabled != xnack_enabled) {
163	/ Found a process with a different xnack mode /
164	xnack_conflict = `1`;
165	break;
166	}
167	}
168
169	retval = pm_allocate_runlist_ib(pm, rl_buffer: &rl_buffer, rl_gpu_buffer: rl_gpu_addr,
170	rl_buffer_size: &alloc_size_bytes, is_over_subscription: &is_over_subscription,
171	xnack_conflict);
172	if (retval)
173	return retval;
174
175	*rl_size_bytes = alloc_size_bytes;
176	pm->ib_size_bytes = alloc_size_bytes;
177
178	dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
179	pm->dqm->processes_count, pm->dqm->active_queue_count);
180
181	build_runlist_ib:
182	/ build the run list ib packet /
183	list_for_each_entry(cur, queues, list) {
184	qpd = cur->qpd;
185	/ group processes with the same xnack mode together /
186	if (qpd->pqm->process->xnack_enabled != xnack_enabled)
187	continue;
188	/ build map process packet /
189	if (processes_mapped >= pm->dqm->processes_count) {
190	dev_dbg(dev, "Not enough space left in runlist IB\n");
191	pm_release_ib(pm);
192	return -ENOMEM;
193	}
194
195	retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd);
196	if (retval)
197	return retval;
198
199	processes_mapped++;
200	inc_wptr(wptr: &rl_wptr, increment_bytes: pm->pmf->map_process_size,
201	buffer_size_bytes: alloc_size_bytes);
202
203	list_for_each_entry(kq, &qpd->priv_queue_list, list) {
204	if (!kq->queue->properties.is_active)
205	continue;
206
207	dev_dbg(dev,
208	"static_queue, mapping kernel q %d, is debug status %d\n",
209	kq->queue->queue, qpd->is_debug);
210
211	retval = pm->pmf->map_queues(pm,
212	&rl_buffer[rl_wptr],
213	kq->queue,
214	qpd->is_debug);
215	if (retval)
216	return retval;
217
218	inc_wptr(wptr: &rl_wptr,
219	increment_bytes: pm->pmf->map_queues_size,
220	buffer_size_bytes: alloc_size_bytes);
221	}
222
223	list_for_each_entry(q, &qpd->queues_list, list) {
224	if (!q->properties.is_active)
225	continue;
226
227	dev_dbg(dev,
228	"static_queue, mapping user queue %d, is debug status %d\n",
229	q->queue, qpd->is_debug);
230
231	retval = pm->pmf->map_queues(pm,
232	&rl_buffer[rl_wptr],
233	q,
234	qpd->is_debug);
235
236	if (retval)
237	return retval;
238
239	inc_wptr(wptr: &rl_wptr,
240	increment_bytes: pm->pmf->map_queues_size,
241	buffer_size_bytes: alloc_size_bytes);
242	}
243	}
244	if (xnack_conflict) {
245	/ pick up processes with the other xnack mode /
246	xnack_enabled = !xnack_enabled;
247	xnack_conflict = `0`;
248	goto build_runlist_ib;
249	}
250
251	dev_dbg(dev, "Finished map process and queues to runlist\n");
252
253	if (is_over_subscription) {
254	if (!pm->is_over_subscription)
255	dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n",
256	is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ?
257	" too many processes" : "",
258	is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ?
259	" too many queues" : "",
260	is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ?
261	" multiple processes using cooperative launch" : "",
262	is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ?
263	" xnack on/off processes mixed on gfx9" : "");
264
265	retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
266	*rl_gpu_addr,
267	alloc_size_bytes / sizeof(uint32_t),
268	true);
269	}
270	pm->is_over_subscription = !!is_over_subscription;
271
272	for (i = `0`; i < alloc_size_bytes / sizeof(uint32_t); i++)
273	pr_debug("0x%2X ", rl_buffer[i]);
274	pr_debug("\n");
275
276	return retval;
277	}
278
279	int pm_init(struct packet_manager pm, struct* device_queue_manager *dqm)
280	{
281	switch (dqm->dev->adev->asic_type) {
282	case CHIP_KAVERI:
283	case CHIP_HAWAII:
284	/ PM4 packet structures on CIK are the same as on VI /
285	case CHIP_CARRIZO:
286	case CHIP_TONGA:
287	case CHIP_FIJI:
288	case CHIP_POLARIS10:
289	case CHIP_POLARIS11:
290	case CHIP_POLARIS12:
291	case CHIP_VEGAM:
292	pm->pmf = &kfd_vi_pm_funcs;
293	break;
294	default:
295	if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(`9`, `4`, `2`) \|\|
296	KFD_GC_VERSION(dqm->dev) == IP_VERSION(`9`, `4`, `3`) \|\|
297	KFD_GC_VERSION(dqm->dev) == IP_VERSION(`9`, `4`, `4`) \|\|
298	KFD_GC_VERSION(dqm->dev) == IP_VERSION(`9`, `5`, `0`))
299	pm->pmf = &kfd_aldebaran_pm_funcs;
300	else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(`9`, `0`, `1`))
301	pm->pmf = &kfd_v9_pm_funcs;
302	else {
303	WARN(`1`, "Unexpected ASIC family %u",
304	dqm->dev->adev->asic_type);
305	return -EINVAL;
306	}
307	}
308
309	pm->dqm = dqm;
310	mutex_init(&pm->lock);
311	pm->priv_queue = kernel_queue_init(dev: dqm->dev, type: KFD_QUEUE_TYPE_HIQ);
312	if (!pm->priv_queue) {
313	mutex_destroy(lock: &pm->lock);
314	return -ENOMEM;
315	}
316	pm->allocated = false;
317
318	return `0`;
319	}
320
321	void pm_uninit(struct packet_manager *pm)
322	{
323	mutex_destroy(lock: &pm->lock);
324	kernel_queue_uninit(kq: pm->priv_queue);
325	pm->priv_queue = NULL;
326	}
327
328	int pm_send_set_resources(struct packet_manager *pm,
329	struct scheduling_resources *res)
330	{
331	struct kfd_node *node = pm->dqm->dev;
332	struct device *dev = node->adev->dev;
333	uint32_t *buffer, size;
334	int retval = `0`;
335
336	size = pm->pmf->set_resources_size;
337	mutex_lock(&pm->lock);
338	kq_acquire_packet_buffer(kq: pm->priv_queue,
339	packet_size_in_dwords: size / sizeof(uint32_t),
340	buffer_ptr: (unsigned int **)&buffer);
341	if (!buffer) {
342	dev_err(dev, "Failed to allocate buffer on kernel queue\n");
343	retval = -ENOMEM;
344	goto out;
345	}
346
347	retval = pm->pmf->set_resources(pm, buffer, res);
348	if (!retval)
349	retval = kq_submit_packet(kq: pm->priv_queue);
350	else
351	kq_rollback_packet(kq: pm->priv_queue);
352
353	out:
354	mutex_unlock(lock: &pm->lock);
355
356	return retval;
357	}
358
359	int pm_send_runlist(struct packet_manager pm, struct* list_head *dqm_queues)
360	{
361	uint64_t rl_gpu_ib_addr;
362	uint32_t *rl_buffer;
363	size_t rl_ib_size, packet_size_dwords;
364	int retval;
365
366	retval = pm_create_runlist_ib(pm, queues: dqm_queues, rl_gpu_addr: &rl_gpu_ib_addr,
367	rl_size_bytes: &rl_ib_size);
368	if (retval)
369	goto fail_create_runlist_ib;
370
371	pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr);
372
373	packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t);
374	mutex_lock(&pm->lock);
375
376	retval = kq_acquire_packet_buffer(kq: pm->priv_queue,
377	packet_size_in_dwords: packet_size_dwords, buffer_ptr: &rl_buffer);
378	if (retval)
379	goto fail_acquire_packet_buffer;
380
381	retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr,
382	rl_ib_size / sizeof(uint32_t), false);
383	if (retval)
384	goto fail_create_runlist;
385
386	retval = kq_submit_packet(kq: pm->priv_queue);
387
388	mutex_unlock(lock: &pm->lock);
389
390	return retval;
391
392	fail_create_runlist:
393	kq_rollback_packet(kq: pm->priv_queue);
394	fail_acquire_packet_buffer:
395	mutex_unlock(lock: &pm->lock);
396	fail_create_runlist_ib:
397	pm_release_ib(pm);
398	return retval;
399	}
400
401	int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
402	uint64_t fence_value)
403	{
404	struct kfd_node *node = pm->dqm->dev;
405	struct device *dev = node->adev->dev;
406	uint32_t *buffer, size;
407	int retval = `0`;
408
409	if (WARN_ON(!fence_address))
410	return -EFAULT;
411
412	size = pm->pmf->query_status_size;
413	mutex_lock(&pm->lock);
414	kq_acquire_packet_buffer(kq: pm->priv_queue,
415	packet_size_in_dwords: size / sizeof(uint32_t), buffer_ptr: (unsigned int **)&buffer);
416	if (!buffer) {
417	dev_err(dev, "Failed to allocate buffer on kernel queue\n");
418	retval = -ENOMEM;
419	goto out;
420	}
421
422	retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value);
423	if (!retval)
424	retval = kq_submit_packet(kq: pm->priv_queue);
425	else
426	kq_rollback_packet(kq: pm->priv_queue);
427
428	out:
429	mutex_unlock(lock: &pm->lock);
430	return retval;
431	}
432
433	/ pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts*
434	* by writing to CP_IQ_WAIT_TIME2 registers.
435	*
436	* @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition
437	* @value: Depends on the cmd. This parameter is unused for
438	* KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For
439	* KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set
440	*
441	*/
442	int pm_config_dequeue_wait_counts(struct packet_manager *pm,
443	enum kfd_config_dequeue_wait_counts_cmd cmd,
444	uint32_t value)
445	{
446	struct kfd_node *node = pm->dqm->dev;
447	struct device *dev = node->adev->dev;
448	int retval = `0`;
449	uint32_t *buffer, size;
450
451	if (!pm->pmf->config_dequeue_wait_counts \|\|
452	!pm->pmf->config_dequeue_wait_counts_size)
453	return `0`;
454
455	if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(`9`, `4`, `1`) \|\|
456	KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(`10`, `0`, `0`)))
457	return `0`;
458
459	size = pm->pmf->config_dequeue_wait_counts_size;
460
461	mutex_lock(&pm->lock);
462
463	if (size) {
464	kq_acquire_packet_buffer(kq: pm->priv_queue,
465	packet_size_in_dwords: size / sizeof(uint32_t),
466	buffer_ptr: (unsigned int **)&buffer);
467
468	if (!buffer) {
469	dev_err(dev,
470	"Failed to allocate buffer on kernel queue\n");
471	retval = -ENOMEM;
472	goto out;
473	}
474
475	retval = pm->pmf->config_dequeue_wait_counts(pm, buffer,
476	cmd, value);
477	if (!retval) {
478	retval = kq_submit_packet(kq: pm->priv_queue);
479
480	/ If default value is modified, cache that in dqm->wait_times /
481	if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT)
482	update_dqm_wait_times(dqm: pm->dqm);
483	} else {
484	kq_rollback_packet(kq: pm->priv_queue);
485	}
486	}
487	out:
488	mutex_unlock(lock: &pm->lock);
489	return retval;
490	}
491
492	int pm_send_unmap_queue(struct packet_manager *pm,
493	enum kfd_unmap_queues_filter filter,
494	uint32_t filter_param, bool reset)
495	{
496	struct kfd_node *node = pm->dqm->dev;
497	struct device *dev = node->adev->dev;
498	uint32_t *buffer, size;
499	int retval = `0`;
500
501	size = pm->pmf->unmap_queues_size;
502	mutex_lock(&pm->lock);
503	kq_acquire_packet_buffer(kq: pm->priv_queue,
504	packet_size_in_dwords: size / sizeof(uint32_t), buffer_ptr: (unsigned int **)&buffer);
505	if (!buffer) {
506	dev_err(dev, "Failed to allocate buffer on kernel queue\n");
507	retval = -ENOMEM;
508	goto out;
509	}
510
511	retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset);
512	if (!retval)
513	retval = kq_submit_packet(kq: pm->priv_queue);
514	else
515	kq_rollback_packet(kq: pm->priv_queue);
516
517	out:
518	mutex_unlock(lock: &pm->lock);
519	return retval;
520	}
521
522	void pm_release_ib(struct packet_manager *pm)
523	{
524	mutex_lock(&pm->lock);
525	if (pm->allocated) {
526	kfd_gtt_sa_free(node: pm->dqm->dev, mem_obj: pm->ib_buffer_obj);
527	pm->allocated = false;
528	}
529	mutex_unlock(lock: &pm->lock);
530	}
531
532	#if defined(CONFIG_DEBUG_FS)
533
534	int pm_debugfs_runlist(struct seq_file m, void* *data)
535	{
536	struct packet_manager *pm = data;
537
538	mutex_lock(&pm->lock);
539
540	if (!pm->allocated) {
541	seq_puts(m, s: " No active runlist\n");
542	goto out;
543	}
544
545	seq_hex_dump(m, prefix_str: " ", prefix_type: DUMP_PREFIX_OFFSET, rowsize: `32`, groupsize: `4`,
546	buf: pm->ib_buffer_obj->cpu_ptr, len: pm->ib_size_bytes, ascii: false);
547
548	out:
549	mutex_unlock(lock: &pm->lock);
550	return `0`;
551	}
552
553	int pm_debugfs_hang_hws(struct packet_manager *pm)
554	{
555	struct kfd_node *node = pm->dqm->dev;
556	struct device *dev = node->adev->dev;
557	uint32_t *buffer, size;
558	int r = `0`;
559
560	if (!pm->priv_queue)
561	return -EAGAIN;
562
563	size = pm->pmf->query_status_size;
564	mutex_lock(&pm->lock);
565	kq_acquire_packet_buffer(kq: pm->priv_queue,
566	packet_size_in_dwords: size / sizeof(uint32_t), buffer_ptr: (unsigned int **)&buffer);
567	if (!buffer) {
568	dev_err(dev, "Failed to allocate buffer on kernel queue\n");
569	r = -ENOMEM;
570	goto out;
571	}
572	memset(buffer, `0x55`, size);
573	kq_submit_packet(kq: pm->priv_queue);
574
575	dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
576	buffer[`0`], buffer[`1`], buffer[`2`], buffer[`3`], buffer[`4`],
577	buffer[`5`], buffer[`6`]);
578	out:
579	mutex_unlock(lock: &pm->lock);
580	return r;
581	}
582
583
584	#endif
585

source code of linux/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c