kfd_process_queue_manager.c source code [linux/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c]

1	// SPDX-License-Identifier: GPL-2.0 OR MIT
2	/*
3	* Copyright 2014-2022 Advanced Micro Devices, Inc.
4	*
5	* Permission is hereby granted, free of charge, to any person obtaining a
6	* copy of this software and associated documentation files (the "Software"),
7	* to deal in the Software without restriction, including without limitation
8	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9	* and/or sell copies of the Software, and to permit persons to whom the
10	* Software is furnished to do so, subject to the following conditions:
11	*
12	* The above copyright notice and this permission notice shall be included in
13	* all copies or substantial portions of the Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
19	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21	* OTHER DEALINGS IN THE SOFTWARE.
22	*
23	*/
24
25	#include <linux/slab.h>
26	#include <linux/list.h>
27	#include "kfd_device_queue_manager.h"
28	#include "kfd_priv.h"
29	#include "kfd_kernel_queue.h"
30	#include "amdgpu_amdkfd.h"
31	#include "amdgpu_reset.h"
32
33	static inline struct process_queue_node *get_queue_by_qid(
34	struct process_queue_manager pqm, unsigned* int qid)
35	{
36	struct process_queue_node *pqn;
37
38	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
39	if ((pqn->q && pqn->q->properties.queue_id == qid) \|\|
40	(pqn->kq && pqn->kq->queue->properties.queue_id == qid))
41	return pqn;
42	}
43
44	return NULL;
45	}
46
47	static int assign_queue_slot_by_qid(struct process_queue_manager *pqm,
48	unsigned int qid)
49	{
50	if (qid >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS)
51	return -EINVAL;
52
53	if (__test_and_set_bit(qid, pqm->queue_slot_bitmap)) {
54	pr_err("Cannot create new queue because requested qid(%u) is in use\n", qid);
55	return -ENOSPC;
56	}
57
58	return `0`;
59	}
60
61	static int find_available_queue_slot(struct process_queue_manager *pqm,
62	unsigned int *qid)
63	{
64	unsigned long found;
65
66	found = find_first_zero_bit(addr: pqm->queue_slot_bitmap,
67	KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
68
69	pr_debug("The new slot id %lu\n", found);
70
71	if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
72	pr_info("Cannot open more queues for process with pid %d\n",
73	pqm->process->lead_thread->pid);
74	return -ENOMEM;
75	}
76
77	set_bit(nr: found, addr: pqm->queue_slot_bitmap);
78	*qid = found;
79
80	return `0`;
81	}
82
83	void kfd_process_dequeue_from_device(struct kfd_process_device *pdd)
84	{
85	struct kfd_node *dev = pdd->dev;
86
87	if (pdd->already_dequeued)
88	return;
89	/ The MES context flush needs to filter out the case which the*
90	* KFD process is created without setting up the MES context and
91	* queue for creating a compute queue.
92	*/
93	dev->dqm->ops.process_termination(dev->dqm, &pdd->qpd);
94	if (dev->kfd->shared_resources.enable_mes && !!pdd->proc_ctx_gpu_addr &&
95	down_read_trylock(sem: &dev->adev->reset_domain->sem)) {
96	amdgpu_mes_flush_shader_debugger(adev: dev->adev,
97	process_context_addr: pdd->proc_ctx_gpu_addr);
98	up_read(sem: &dev->adev->reset_domain->sem);
99	}
100	pdd->already_dequeued = true;
101	}
102
103	int pqm_set_gws(struct process_queue_manager pqm, unsigned* int qid,
104	void *gws)
105	{
106	struct mqd_update_info minfo = {`0`};
107	struct kfd_node *dev = NULL;
108	struct process_queue_node *pqn;
109	struct kfd_process_device *pdd;
110	struct kgd_mem *mem = NULL;
111	int ret;
112
113	pqn = get_queue_by_qid(pqm, qid);
114	if (!pqn) {
115	pr_err("Queue id does not match any known queue\n");
116	return -EINVAL;
117	}
118
119	if (pqn->q)
120	dev = pqn->q->device;
121	if (WARN_ON(!dev))
122	return -ENODEV;
123
124	pdd = kfd_get_process_device_data(dev, p: pqm->process);
125	if (!pdd) {
126	pr_err("Process device data doesn't exist\n");
127	return -EINVAL;
128	}
129
130	/ Only allow one queue per process can have GWS assigned /
131	if (gws && pdd->qpd.num_gws)
132	return -EBUSY;
133
134	if (!gws && pdd->qpd.num_gws == `0`)
135	return -EINVAL;
136
137	if ((KFD_GC_VERSION(dev) != IP_VERSION(`9`, `4`, `3`) &&
138	KFD_GC_VERSION(dev) != IP_VERSION(`9`, `4`, `4`) &&
139	KFD_GC_VERSION(dev) != IP_VERSION(`9`, `5`, `0`)) &&
140	!dev->kfd->shared_resources.enable_mes) {
141	if (gws)
142	ret = amdgpu_amdkfd_add_gws_to_process(info: pdd->process->kgd_process_info,
143	gws, mem: &mem);
144	else
145	ret = amdgpu_amdkfd_remove_gws_from_process(info: pdd->process->kgd_process_info,
146	mem: pqn->q->gws);
147	if (unlikely(ret))
148	return ret;
149	pqn->q->gws = mem;
150	} else {
151	/*
152	* Intentionally set GWS to a non-NULL value
153	* for devices that do not use GWS for global wave
154	* synchronization but require the formality
155	* of setting GWS for cooperative groups.
156	*/
157	pqn->q->gws = gws ? ERR_PTR(error: -ENOMEM) : NULL;
158	}
159
160	pdd->qpd.num_gws = gws ? dev->adev->gds.gws_size : `0`;
161	minfo.update_flag = gws ? UPDATE_FLAG_IS_GWS : `0`;
162
163	return pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
164	pqn->q, &minfo);
165	}
166
167	void kfd_process_dequeue_from_all_devices(struct kfd_process *p)
168	{
169	int i;
170
171	for (i = `0`; i < p->n_pdds; i++)
172	kfd_process_dequeue_from_device(pdd: p->pdds[i]);
173	}
174
175	int pqm_init(struct process_queue_manager pqm, struct* kfd_process *p)
176	{
177	INIT_LIST_HEAD(list: &pqm->queues);
178	pqm->queue_slot_bitmap = bitmap_zalloc(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS,
179	GFP_KERNEL);
180	if (!pqm->queue_slot_bitmap)
181	return -ENOMEM;
182	pqm->process = p;
183
184	return `0`;
185	}
186
187	static void pqm_clean_queue_resource(struct process_queue_manager *pqm,
188	struct process_queue_node *pqn)
189	{
190	struct kfd_node *dev;
191	struct kfd_process_device *pdd;
192
193	dev = pqn->q->device;
194
195	pdd = kfd_get_process_device_data(dev, p: pqm->process);
196	if (!pdd) {
197	pr_err("Process device data doesn't exist\n");
198	return;
199	}
200
201	if (pqn->q->gws) {
202	if (KFD_GC_VERSION(pqn->q->device) != IP_VERSION(`9`, `4`, `3`) &&
203	KFD_GC_VERSION(pqn->q->device) != IP_VERSION(`9`, `4`, `4`) &&
204	KFD_GC_VERSION(pqn->q->device) != IP_VERSION(`9`, `5`, `0`) &&
205	!dev->kfd->shared_resources.enable_mes)
206	amdgpu_amdkfd_remove_gws_from_process(
207	info: pqm->process->kgd_process_info, mem: pqn->q->gws);
208	pdd->qpd.num_gws = `0`;
209	}
210
211	if (dev->kfd->shared_resources.enable_mes) {
212	amdgpu_amdkfd_free_gtt_mem(adev: dev->adev, mem_obj: &pqn->q->gang_ctx_bo);
213	amdgpu_amdkfd_free_gtt_mem(adev: dev->adev, mem_obj: (void **)&pqn->q->wptr_bo_gart);
214	}
215	}
216
217	void pqm_uninit(struct process_queue_manager *pqm)
218	{
219	struct process_queue_node pqn, next;
220
221	list_for_each_entry_safe(pqn, next, &pqm->queues, process_queue_list) {
222	if (pqn->q) {
223	struct kfd_process_device *pdd = kfd_get_process_device_data(dev: pqn->q->device,
224	p: pqm->process);
225	if (pdd) {
226	kfd_queue_unref_bo_vas(pdd, properties: &pqn->q->properties);
227	kfd_queue_release_buffers(pdd, properties: &pqn->q->properties);
228	} else {
229	WARN_ON(!pdd);
230	}
231	pqm_clean_queue_resource(pqm, pqn);
232	}
233
234	kfd_procfs_del_queue(q: pqn->q);
235	uninit_queue(q: pqn->q);
236	list_del(entry: &pqn->process_queue_list);
237	kfree(objp: pqn);
238	}
239
240	bitmap_free(bitmap: pqm->queue_slot_bitmap);
241	pqm->queue_slot_bitmap = NULL;
242	}
243
244	static int init_user_queue(struct process_queue_manager *pqm,
245	struct kfd_node dev, struct* queue **q,
246	struct queue_properties *q_properties,
247	unsigned int qid)
248	{
249	int retval;
250
251	/ Doorbell initialized in user space/
252	q_properties->doorbell_ptr = NULL;
253	q_properties->exception_status = KFD_EC_MASK(EC_QUEUE_NEW);
254
255	/ let DQM handle it/
256	q_properties->vmid = `0`;
257	q_properties->queue_id = qid;
258
259	retval = init_queue(q, properties: q_properties);
260	if (retval != `0`)
261	return retval;
262
263	(*q)->device = dev;
264	(*q)->process = pqm->process;
265
266	if (dev->kfd->shared_resources.enable_mes) {
267	retval = amdgpu_amdkfd_alloc_gtt_mem(adev: dev->adev,
268	AMDGPU_MES_GANG_CTX_SIZE,
269	mem_obj: &(*q)->gang_ctx_bo,
270	gpu_addr: &(*q)->gang_ctx_gpu_addr,
271	cpu_ptr: &(*q)->gang_ctx_cpu_ptr,
272	mqd_gfx9: false);
273	if (retval) {
274	pr_err("failed to allocate gang context bo\n");
275	goto cleanup;
276	}
277	memset((*q)->gang_ctx_cpu_ptr, `0`, AMDGPU_MES_GANG_CTX_SIZE);
278
279	/ Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work*
280	* on unmapped queues for usermode queue oversubscription (no aggregated doorbell)
281	*/
282	if (dev->adev != amdgpu_ttm_adev(bdev: q_properties->wptr_bo->tbo.bdev)) {
283	pr_err("Queue memory allocated to wrong device\n");
284	retval = -EINVAL;
285	goto free_gang_ctx_bo;
286	}
287
288	retval = amdgpu_amdkfd_map_gtt_bo_to_gart(bo: q_properties->wptr_bo,
289	bo_gart: &(*q)->wptr_bo_gart);
290	if (retval) {
291	pr_err("Failed to map wptr bo to GART\n");
292	goto free_gang_ctx_bo;
293	}
294	}
295
296	pr_debug("PQM After init queue");
297	return `0`;
298
299	free_gang_ctx_bo:
300	amdgpu_amdkfd_free_gtt_mem(adev: dev->adev, mem_obj: &(*q)->gang_ctx_bo);
301	cleanup:
302	uninit_queue(q: *q);
303	*q = NULL;
304	return retval;
305	}
306
307	int pqm_create_queue(struct process_queue_manager *pqm,
308	struct kfd_node *dev,
309	struct queue_properties *properties,
310	unsigned int *qid,
311	const struct kfd_criu_queue_priv_data *q_data,
312	const void *restore_mqd,
313	const void *restore_ctl_stack,
314	uint32_t *p_doorbell_offset_in_process)
315	{
316	int retval;
317	struct kfd_process_device *pdd;
318	struct queue *q;
319	struct process_queue_node *pqn;
320	struct kernel_queue *kq;
321	enum kfd_queue_type type = properties->type;
322	unsigned int max_queues = `127`; / HWS limit /
323
324	/*
325	* On GFX 9.4.3/9.5.0, increase the number of queues that
326	* can be created to 255. No HWS limit on GFX 9.4.3/9.5.0.
327	*/
328	if (KFD_GC_VERSION(dev) == IP_VERSION(`9`, `4`, `3`) \|\|
329	KFD_GC_VERSION(dev) == IP_VERSION(`9`, `4`, `4`) \|\|
330	KFD_GC_VERSION(dev) == IP_VERSION(`9`, `5`, `0`))
331	max_queues = `255`;
332
333	q = NULL;
334	kq = NULL;
335
336	pdd = kfd_get_process_device_data(dev, p: pqm->process);
337	if (!pdd) {
338	pr_err("Process device data doesn't exist\n");
339	return -`1`;
340	}
341
342	/*
343	* for debug process, verify that it is within the static queues limit
344	* currently limit is set to half of the total avail HQD slots
345	* If we are just about to create DIQ, the is_debug flag is not set yet
346	* Hence we also check the type as well
347	*/
348	if ((pdd->qpd.is_debug) \|\| (type == KFD_QUEUE_TYPE_DIQ))
349	max_queues = dev->kfd->device_info.max_no_of_hqd/`2`;
350
351	if (pdd->qpd.queue_count >= max_queues)
352	return -ENOSPC;
353
354	if (q_data) {
355	retval = assign_queue_slot_by_qid(pqm, qid: q_data->q_id);
356	*qid = q_data->q_id;
357	} else
358	retval = find_available_queue_slot(pqm, qid);
359
360	if (retval != `0`)
361	return retval;
362
363	/ Register process if this is the first queue /
364	if (list_empty(head: &pdd->qpd.queues_list) &&
365	list_empty(head: &pdd->qpd.priv_queue_list))
366	dev->dqm->ops.register_process(dev->dqm, &pdd->qpd);
367
368	/ Allocate proc_ctx_bo only if MES is enabled and this is the first queue /
369	if (!pdd->proc_ctx_cpu_ptr && dev->kfd->shared_resources.enable_mes) {
370	retval = amdgpu_amdkfd_alloc_gtt_mem(adev: dev->adev,
371	AMDGPU_MES_PROC_CTX_SIZE,
372	mem_obj: &pdd->proc_ctx_bo,
373	gpu_addr: &pdd->proc_ctx_gpu_addr,
374	cpu_ptr: &pdd->proc_ctx_cpu_ptr,
375	mqd_gfx9: false);
376	if (retval) {
377	dev_err(dev->adev->dev, "failed to allocate process context bo\n");
378	return retval;
379	}
380	memset(pdd->proc_ctx_cpu_ptr, `0`, AMDGPU_MES_PROC_CTX_SIZE);
381	}
382
383	pqn = kzalloc(sizeof(*pqn), GFP_KERNEL);
384	if (!pqn) {
385	retval = -ENOMEM;
386	goto err_allocate_pqn;
387	}
388
389	switch (type) {
390	case KFD_QUEUE_TYPE_SDMA:
391	case KFD_QUEUE_TYPE_SDMA_XGMI:
392	case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID:
393	/ SDMA queues are always allocated statically no matter*
394	* which scheduler mode is used. We also do not need to
395	* check whether a SDMA queue can be allocated here, because
396	* allocate_sdma_queue() in create_queue() has the
397	* corresponding check logic.
398	*/
399	retval = init_user_queue(pqm, dev, q: &q, q_properties: properties, qid: *qid);
400	if (retval != `0`)
401	goto err_create_queue;
402	pqn->q = q;
403	pqn->kq = NULL;
404	retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
405	restore_mqd, restore_ctl_stack);
406	print_queue(q);
407	break;
408
409	case KFD_QUEUE_TYPE_COMPUTE:
410	/ check if there is over subscription /
411	if ((dev->dqm->sched_policy ==
412	KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
413	((dev->dqm->processes_count >= dev->vm_info.vmid_num_kfd) \|\|
414	(dev->dqm->active_queue_count >= get_cp_queues_num(dqm: dev->dqm)))) {
415	pr_debug("Over-subscription is not allowed when amdkfd.sched_policy == 1\n");
416	retval = -EPERM;
417	goto err_create_queue;
418	}
419
420	retval = init_user_queue(pqm, dev, q: &q, q_properties: properties, qid: *qid);
421	if (retval != `0`)
422	goto err_create_queue;
423	pqn->q = q;
424	pqn->kq = NULL;
425	retval = dev->dqm->ops.create_queue(dev->dqm, q, &pdd->qpd, q_data,
426	restore_mqd, restore_ctl_stack);
427	print_queue(q);
428	break;
429	case KFD_QUEUE_TYPE_DIQ:
430	kq = kernel_queue_init(dev, type: KFD_QUEUE_TYPE_DIQ);
431	if (!kq) {
432	retval = -ENOMEM;
433	goto err_create_queue;
434	}
435	kq->queue->properties.queue_id = *qid;
436	pqn->kq = kq;
437	pqn->q = NULL;
438	retval = kfd_process_drain_interrupts(pdd);
439	if (retval)
440	break;
441
442	retval = dev->dqm->ops.create_kernel_queue(dev->dqm,
443	kq, &pdd->qpd);
444	break;
445	default:
446	WARN(`1`, "Invalid queue type %d", type);
447	retval = -EINVAL;
448	}
449
450	if (retval != `0`) {
451	if ((type == KFD_QUEUE_TYPE_SDMA \|\|
452	type == KFD_QUEUE_TYPE_SDMA_XGMI \|\|
453	type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) &&
454	retval == -ENOMEM)
455	pr_warn("process pid %d DQM create queue type %d failed. ret %d\n",
456	pqm->process->lead_thread->pid, type, retval);
457	else
458	pr_err("process pid %d DQM create queue type %d failed. ret %d\n",
459	pqm->process->lead_thread->pid, type, retval);
460	goto err_create_queue;
461	}
462
463	if (q && p_doorbell_offset_in_process) {
464	/ Return the doorbell offset within the doorbell page*
465	* to the caller so it can be passed up to user mode
466	* (in bytes).
467	* relative doorbell index = Absolute doorbell index -
468	* absolute index of first doorbell in the page.
469	*/
470	uint32_t first_db_index = amdgpu_doorbell_index_on_bar(adev: pdd->dev->adev,
471	db_bo: pdd->qpd.proc_doorbells,
472	doorbell_index: `0`,
473	db_size: pdd->dev->kfd->device_info.doorbell_size);
474
475	*p_doorbell_offset_in_process = (q->properties.doorbell_off
476	- first_db_index) * sizeof(uint32_t);
477	}
478
479	pr_debug("PQM After DQM create queue\n");
480
481	list_add(new: &pqn->process_queue_list, head: &pqm->queues);
482
483	if (q) {
484	pr_debug("PQM done creating queue\n");
485	kfd_procfs_add_queue(q);
486	print_queue_properties(q: &q->properties);
487	}
488
489	return retval;
490
491	err_create_queue:
492	uninit_queue(q);
493	if (kq)
494	kernel_queue_uninit(kq);
495	kfree(objp: pqn);
496	err_allocate_pqn:
497	/ check if queues list is empty unregister process from device /
498	clear_bit(nr: *qid, addr: pqm->queue_slot_bitmap);
499	if (list_empty(head: &pdd->qpd.queues_list) &&
500	list_empty(head: &pdd->qpd.priv_queue_list))
501	dev->dqm->ops.unregister_process(dev->dqm, &pdd->qpd);
502	return retval;
503	}
504
505	int pqm_destroy_queue(struct process_queue_manager pqm, unsigned* int qid)
506	{
507	struct process_queue_node *pqn;
508	struct kfd_process_device *pdd;
509	struct device_queue_manager *dqm;
510	struct kfd_node *dev;
511	int retval;
512
513	dqm = NULL;
514
515	retval = `0`;
516
517	pqn = get_queue_by_qid(pqm, qid);
518	if (!pqn) {
519	pr_err("Queue id does not match any known queue\n");
520	return -EINVAL;
521	}
522
523	dev = NULL;
524	if (pqn->kq)
525	dev = pqn->kq->dev;
526	if (pqn->q)
527	dev = pqn->q->device;
528	if (WARN_ON(!dev))
529	return -ENODEV;
530
531	pdd = kfd_get_process_device_data(dev, p: pqm->process);
532	if (!pdd) {
533	pr_err("Process device data doesn't exist\n");
534	return -`1`;
535	}
536
537	if (pqn->kq) {
538	/ destroy kernel queue (DIQ) /
539	dqm = pqn->kq->dev->dqm;
540	dqm->ops.destroy_kernel_queue(dqm, pqn->kq, &pdd->qpd);
541	kernel_queue_uninit(kq: pqn->kq);
542	}
543
544	if (pqn->q) {
545	retval = kfd_queue_unref_bo_vas(pdd, properties: &pqn->q->properties);
546	if (retval)
547	goto err_destroy_queue;
548
549	dqm = pqn->q->device->dqm;
550	retval = dqm->ops.destroy_queue(dqm, &pdd->qpd, pqn->q);
551	if (retval) {
552	pr_err("Pasid 0x%x destroy queue %d failed, ret %d\n",
553	pdd->pasid,
554	pqn->q->properties.queue_id, retval);
555	if (retval != -ETIME && retval != -EIO)
556	goto err_destroy_queue;
557	}
558	kfd_procfs_del_queue(q: pqn->q);
559	kfd_queue_release_buffers(pdd, properties: &pqn->q->properties);
560	pqm_clean_queue_resource(pqm, pqn);
561	uninit_queue(q: pqn->q);
562	}
563
564	list_del(entry: &pqn->process_queue_list);
565	kfree(objp: pqn);
566	clear_bit(nr: qid, addr: pqm->queue_slot_bitmap);
567
568	if (list_empty(head: &pdd->qpd.queues_list) &&
569	list_empty(head: &pdd->qpd.priv_queue_list))
570	dqm->ops.unregister_process(dqm, &pdd->qpd);
571
572	err_destroy_queue:
573	return retval;
574	}
575
576	int pqm_update_queue_properties(struct process_queue_manager *pqm,
577	unsigned int qid, struct queue_properties *p)
578	{
579	int retval;
580	struct process_queue_node *pqn;
581
582	pqn = get_queue_by_qid(pqm, qid);
583	if (!pqn \|\| !pqn->q) {
584	pr_debug("No queue %d exists for update operation\n", qid);
585	return -EFAULT;
586	}
587
588	/*
589	* Update with NULL ring address is used to disable the queue
590	*/
591	if (p->queue_address && p->queue_size) {
592	struct kfd_process_device *pdd;
593	struct amdgpu_vm *vm;
594	struct queue *q = pqn->q;
595	int err;
596
597	pdd = kfd_get_process_device_data(dev: q->device, p: q->process);
598	if (!pdd)
599	return -ENODEV;
600	vm = drm_priv_to_vm(pdd->drm_priv);
601	err = amdgpu_bo_reserve(bo: vm->root.bo, no_intr: false);
602	if (err)
603	return err;
604
605	if (kfd_queue_buffer_get(vm, addr: (void *)p->queue_address, pbo: &p->ring_bo,
606	expected_size: p->queue_size)) {
607	pr_debug("ring buf 0x%llx size 0x%llx not mapped on GPU\n",
608	p->queue_address, p->queue_size);
609	return -EFAULT;
610	}
611
612	kfd_queue_unref_bo_va(vm, bo: &pqn->q->properties.ring_bo);
613	kfd_queue_buffer_put(bo: &pqn->q->properties.ring_bo);
614	amdgpu_bo_unreserve(bo: vm->root.bo);
615
616	pqn->q->properties.ring_bo = p->ring_bo;
617	}
618
619	pqn->q->properties.queue_address = p->queue_address;
620	pqn->q->properties.queue_size = p->queue_size;
621	pqn->q->properties.queue_percent = p->queue_percent;
622	pqn->q->properties.priority = p->priority;
623	pqn->q->properties.pm4_target_xcc = p->pm4_target_xcc;
624
625	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
626	pqn->q, NULL);
627	if (retval != `0`)
628	return retval;
629
630	return `0`;
631	}
632
633	int pqm_update_mqd(struct process_queue_manager *pqm,
634	unsigned int qid, struct mqd_update_info *minfo)
635	{
636	int retval;
637	struct process_queue_node *pqn;
638
639	pqn = get_queue_by_qid(pqm, qid);
640	if (!pqn) {
641	pr_debug("No queue %d exists for update operation\n", qid);
642	return -EFAULT;
643	}
644
645	/ CUs are masked for debugger requirements so deny user mask /
646	if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
647	return -EBUSY;
648
649	/ ASICs that have WGPs must enforce pairwise enabled mask checks. /
650	if (minfo && minfo->cu_mask.ptr &&
651	KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(`10`, `0`, `0`)) {
652	int i;
653
654	for (i = `0`; i < minfo->cu_mask.count; i += `2`) {
655	uint32_t cu_pair = (minfo->cu_mask.ptr[i / `32`] >> (i % `32`)) & `0x3`;
656
657	if (cu_pair && cu_pair != `0x3`) {
658	pr_debug("CUs must be adjacent pairwise enabled.\n");
659	return -EINVAL;
660	}
661	}
662	}
663
664	retval = pqn->q->device->dqm->ops.update_queue(pqn->q->device->dqm,
665	pqn->q, minfo);
666	if (retval != `0`)
667	return retval;
668
669	if (minfo && minfo->cu_mask.ptr)
670	pqn->q->properties.is_user_cu_masked = true;
671
672	return `0`;
673	}
674
675	struct queue pqm_get_user_queue(struct* process_queue_manager *pqm,
676	unsigned int qid)
677	{
678	struct process_queue_node *pqn;
679
680	pqn = get_queue_by_qid(pqm, qid);
681	return pqn ? pqn->q : NULL;
682	}
683
684	int pqm_get_wave_state(struct process_queue_manager *pqm,
685	unsigned int qid,
686	void __user *ctl_stack,
687	u32 *ctl_stack_used_size,
688	u32 *save_area_used_size)
689	{
690	struct process_queue_node *pqn;
691
692	pqn = get_queue_by_qid(pqm, qid);
693	if (!pqn) {
694	pr_debug("amdkfd: No queue %d exists for operation\n",
695	qid);
696	return -EFAULT;
697	}
698
699	return pqn->q->device->dqm->ops.get_wave_state(pqn->q->device->dqm,
700	pqn->q,
701	ctl_stack,
702	ctl_stack_used_size,
703	save_area_used_size);
704	}
705
706	int pqm_get_queue_snapshot(struct process_queue_manager *pqm,
707	uint64_t exception_clear_mask,
708	void __user *buf,
709	int *num_qss_entries,
710	uint32_t *entry_size)
711	{
712	struct process_queue_node *pqn;
713	struct kfd_queue_snapshot_entry src;
714	uint32_t tmp_entry_size = entry_size, tmp_qss_entries = num_qss_entries;
715	int r = `0`;
716
717	*num_qss_entries = `0`;
718	if (!(*entry_size))
719	return -EINVAL;
720
721	entry_size = min_t(size_t, entry_size, sizeof(struct kfd_queue_snapshot_entry));
722	mutex_lock(&pqm->process->event_mutex);
723
724	memset(&src, `0`, sizeof(src));
725
726	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
727	if (!pqn->q)
728	continue;
729
730	if (*num_qss_entries < tmp_qss_entries) {
731	set_queue_snapshot_entry(q: pqn->q, exception_clear_mask, qss_entry: &src);
732
733	if (copy_to_user(to: buf, from: &src, n: *entry_size)) {
734	r = -EFAULT;
735	break;
736	}
737	buf += tmp_entry_size;
738	}
739	*num_qss_entries += `1`;
740	}
741
742	mutex_unlock(lock: &pqm->process->event_mutex);
743	return r;
744	}
745
746	static int get_queue_data_sizes(struct kfd_process_device *pdd,
747	struct queue *q,
748	uint32_t *mqd_size,
749	uint32_t *ctl_stack_size)
750	{
751	int ret;
752
753	ret = pqm_get_queue_checkpoint_info(pqm: &pdd->process->pqm,
754	qid: q->properties.queue_id,
755	mqd_size,
756	ctl_stack_size);
757	if (ret)
758	pr_err("Failed to get queue dump info (%d)\n", ret);
759
760	return ret;
761	}
762
763	int kfd_process_get_queue_info(struct kfd_process *p,
764	uint32_t *num_queues,
765	uint64_t *priv_data_sizes)
766	{
767	uint32_t extra_data_sizes = `0`;
768	struct queue *q;
769	int i;
770	int ret;
771
772	*num_queues = `0`;
773
774	/ Run over all PDDs of the process /
775	for (i = `0`; i < p->n_pdds; i++) {
776	struct kfd_process_device *pdd = p->pdds[i];
777
778	list_for_each_entry(q, &pdd->qpd.queues_list, list) {
779	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE \|\|
780	q->properties.type == KFD_QUEUE_TYPE_SDMA \|\|
781	q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
782	uint32_t mqd_size, ctl_stack_size;
783
784	num_queues = num_queues + `1`;
785
786	ret = get_queue_data_sizes(pdd, q, mqd_size: &mqd_size, ctl_stack_size: &ctl_stack_size);
787	if (ret)
788	return ret;
789
790	extra_data_sizes += mqd_size + ctl_stack_size;
791	} else {
792	pr_err("Unsupported queue type (%d)\n", q->properties.type);
793	return -EOPNOTSUPP;
794	}
795	}
796	}
797	*priv_data_sizes = extra_data_sizes +
798	(num_queues sizeof(struct kfd_criu_queue_priv_data));
799
800	return `0`;
801	}
802
803	static int pqm_checkpoint_mqd(struct process_queue_manager *pqm,
804	unsigned int qid,
805	void *mqd,
806	void *ctl_stack)
807	{
808	struct process_queue_node *pqn;
809
810	pqn = get_queue_by_qid(pqm, qid);
811	if (!pqn) {
812	pr_debug("amdkfd: No queue %d exists for operation\n", qid);
813	return -EFAULT;
814	}
815
816	if (!pqn->q->device->dqm->ops.checkpoint_mqd) {
817	pr_err("amdkfd: queue dumping not supported on this device\n");
818	return -EOPNOTSUPP;
819	}
820
821	return pqn->q->device->dqm->ops.checkpoint_mqd(pqn->q->device->dqm,
822	pqn->q, mqd, ctl_stack);
823	}
824
825	static int criu_checkpoint_queue(struct kfd_process_device *pdd,
826	struct queue *q,
827	struct kfd_criu_queue_priv_data *q_data)
828	{
829	uint8_t mqd, ctl_stack;
830	int ret;
831
832	mqd = (void *)(q_data + `1`);
833	ctl_stack = mqd + q_data->mqd_size;
834
835	q_data->gpu_id = pdd->user_gpu_id;
836	q_data->type = q->properties.type;
837	q_data->format = q->properties.format;
838	q_data->q_id = q->properties.queue_id;
839	q_data->q_address = q->properties.queue_address;
840	q_data->q_size = q->properties.queue_size;
841	q_data->priority = q->properties.priority;
842	q_data->q_percent = q->properties.queue_percent;
843	q_data->read_ptr_addr = (uint64_t)q->properties.read_ptr;
844	q_data->write_ptr_addr = (uint64_t)q->properties.write_ptr;
845	q_data->doorbell_id = q->doorbell_id;
846
847	q_data->sdma_id = q->sdma_id;
848
849	q_data->eop_ring_buffer_address =
850	q->properties.eop_ring_buffer_address;
851
852	q_data->eop_ring_buffer_size = q->properties.eop_ring_buffer_size;
853
854	q_data->ctx_save_restore_area_address =
855	q->properties.ctx_save_restore_area_address;
856
857	q_data->ctx_save_restore_area_size =
858	q->properties.ctx_save_restore_area_size;
859
860	q_data->gws = !!q->gws;
861
862	ret = pqm_checkpoint_mqd(pqm: &pdd->process->pqm, qid: q->properties.queue_id, mqd, ctl_stack);
863	if (ret) {
864	pr_err("Failed checkpoint queue_mqd (%d)\n", ret);
865	return ret;
866	}
867
868	pr_debug("Dumping Queue: gpu_id:%x queue_id:%u\n", q_data->gpu_id, q_data->q_id);
869	return ret;
870	}
871
872	static int criu_checkpoint_queues_device(struct kfd_process_device *pdd,
873	uint8_t __user *user_priv,
874	unsigned int *q_index,
875	uint64_t *queues_priv_data_offset)
876	{
877	unsigned int q_private_data_size = `0`;
878	uint8_t q_private_data = NULL; /* Local buffer to store individual queue private data /
879	struct queue *q;
880	int ret = `0`;
881
882	list_for_each_entry(q, &pdd->qpd.queues_list, list) {
883	struct kfd_criu_queue_priv_data *q_data;
884	uint64_t q_data_size;
885	uint32_t mqd_size;
886	uint32_t ctl_stack_size;
887
888	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE &&
889	q->properties.type != KFD_QUEUE_TYPE_SDMA &&
890	q->properties.type != KFD_QUEUE_TYPE_SDMA_XGMI) {
891
892	pr_err("Unsupported queue type (%d)\n", q->properties.type);
893	ret = -EOPNOTSUPP;
894	break;
895	}
896
897	ret = get_queue_data_sizes(pdd, q, mqd_size: &mqd_size, ctl_stack_size: &ctl_stack_size);
898	if (ret)
899	break;
900
901	q_data_size = sizeof(*q_data) + mqd_size + ctl_stack_size;
902
903	/ Increase local buffer space if needed /
904	if (q_private_data_size < q_data_size) {
905	kfree(objp: q_private_data);
906
907	q_private_data = kzalloc(q_data_size, GFP_KERNEL);
908	if (!q_private_data) {
909	ret = -ENOMEM;
910	break;
911	}
912	q_private_data_size = q_data_size;
913	}
914
915	q_data = (struct kfd_criu_queue_priv_data *)q_private_data;
916
917	/*
918	* data stored in this order:
919	* priv_data, mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
920	*/
921	q_data->mqd_size = mqd_size;
922	q_data->ctl_stack_size = ctl_stack_size;
923
924	ret = criu_checkpoint_queue(pdd, q, q_data);
925	if (ret)
926	break;
927
928	q_data->object_type = KFD_CRIU_OBJECT_TYPE_QUEUE;
929
930	ret = copy_to_user(to: user_priv + *queues_priv_data_offset,
931	from: q_data, n: q_data_size);
932	if (ret) {
933	ret = -EFAULT;
934	break;
935	}
936	*queues_priv_data_offset += q_data_size;
937	q_index = q_index + `1`;
938	}
939
940	kfree(objp: q_private_data);
941
942	return ret;
943	}
944
945	int kfd_criu_checkpoint_queues(struct kfd_process *p,
946	uint8_t __user *user_priv_data,
947	uint64_t *priv_data_offset)
948	{
949	int ret = `0`, pdd_index, q_index = `0`;
950
951	for (pdd_index = `0`; pdd_index < p->n_pdds; pdd_index++) {
952	struct kfd_process_device *pdd = p->pdds[pdd_index];
953
954	/*
955	* criu_checkpoint_queues_device will copy data to user and update q_index and
956	* queues_priv_data_offset
957	*/
958	ret = criu_checkpoint_queues_device(pdd, user_priv: user_priv_data, q_index: &q_index,
959	queues_priv_data_offset: priv_data_offset);
960
961	if (ret)
962	break;
963	}
964
965	return ret;
966	}
967
968	static void set_queue_properties_from_criu(struct queue_properties *qp,
969	struct kfd_criu_queue_priv_data *q_data, uint32_t num_xcc)
970	{
971	qp->is_interop = false;
972	qp->queue_percent = q_data->q_percent;
973	qp->priority = q_data->priority;
974	qp->queue_address = q_data->q_address;
975	qp->queue_size = q_data->q_size;
976	qp->read_ptr = (uint32_t *) q_data->read_ptr_addr;
977	qp->write_ptr = (uint32_t *) q_data->write_ptr_addr;
978	qp->eop_ring_buffer_address = q_data->eop_ring_buffer_address;
979	qp->eop_ring_buffer_size = q_data->eop_ring_buffer_size;
980	qp->ctx_save_restore_area_address = q_data->ctx_save_restore_area_address;
981	qp->ctx_save_restore_area_size = q_data->ctx_save_restore_area_size;
982	if (q_data->type == KFD_QUEUE_TYPE_COMPUTE)
983	qp->ctl_stack_size = q_data->ctl_stack_size / num_xcc;
984	else
985	qp->ctl_stack_size = q_data->ctl_stack_size;
986
987	qp->type = q_data->type;
988	qp->format = q_data->format;
989	}
990
991	int kfd_criu_restore_queue(struct kfd_process *p,
992	uint8_t __user *user_priv_ptr,
993	uint64_t *priv_data_offset,
994	uint64_t max_priv_data_size)
995	{
996	uint8_t mqd, ctl_stack, *q_extra_data = NULL;
997	struct kfd_criu_queue_priv_data *q_data;
998	struct kfd_process_device *pdd;
999	uint64_t q_extra_data_size;
1000	struct queue_properties qp;
1001	unsigned int queue_id;
1002	int ret = `0`;
1003
1004	if (priv_data_offset + sizeof(q_data) > max_priv_data_size)
1005	return -EINVAL;
1006
1007	q_data = kmalloc(sizeof(*q_data), GFP_KERNEL);
1008	if (!q_data)
1009	return -ENOMEM;
1010
1011	ret = copy_from_user(to: q_data, from: user_priv_ptr + priv_data_offset, n: sizeof(q_data));
1012	if (ret) {
1013	ret = -EFAULT;
1014	goto exit;
1015	}
1016
1017	priv_data_offset += sizeof(q_data);
1018	q_extra_data_size = (uint64_t)q_data->ctl_stack_size + q_data->mqd_size;
1019
1020	if (*priv_data_offset + q_extra_data_size > max_priv_data_size) {
1021	ret = -EINVAL;
1022	goto exit;
1023	}
1024
1025	q_extra_data = kmalloc(q_extra_data_size, GFP_KERNEL);
1026	if (!q_extra_data) {
1027	ret = -ENOMEM;
1028	goto exit;
1029	}
1030
1031	ret = copy_from_user(to: q_extra_data, from: user_priv_ptr + *priv_data_offset, n: q_extra_data_size);
1032	if (ret) {
1033	ret = -EFAULT;
1034	goto exit;
1035	}
1036
1037	*priv_data_offset += q_extra_data_size;
1038
1039	pdd = kfd_process_device_data_by_id(process: p, gpu_id: q_data->gpu_id);
1040	if (!pdd) {
1041	pr_err("Failed to get pdd\n");
1042	ret = -EINVAL;
1043	goto exit;
1044	}
1045
1046	/*
1047	* data stored in this order:
1048	* mqd[xcc0], mqd[xcc1],..., ctl_stack[xcc0], ctl_stack[xcc1]...
1049	*/
1050	mqd = q_extra_data;
1051	ctl_stack = mqd + q_data->mqd_size;
1052
1053	memset(&qp, `0`, sizeof(qp));
1054	set_queue_properties_from_criu(qp: &qp, q_data, NUM_XCC(pdd->dev->adev->gfx.xcc_mask));
1055
1056	print_queue_properties(q: &qp);
1057
1058	ret = pqm_create_queue(pqm: &p->pqm, dev: pdd->dev, properties: &qp, qid: &queue_id, q_data, restore_mqd: mqd, restore_ctl_stack: ctl_stack, NULL);
1059	if (ret) {
1060	pr_err("Failed to create new queue err:%d\n", ret);
1061	goto exit;
1062	}
1063
1064	if (q_data->gws)
1065	ret = pqm_set_gws(pqm: &p->pqm, qid: q_data->q_id, gws: pdd->dev->gws);
1066
1067	exit:
1068	if (ret)
1069	pr_err("Failed to restore queue (%d)\n", ret);
1070	else
1071	pr_debug("Queue id %d was restored successfully\n", queue_id);
1072
1073	kfree(objp: q_data);
1074	kfree(objp: q_extra_data);
1075
1076	return ret;
1077	}
1078
1079	int pqm_get_queue_checkpoint_info(struct process_queue_manager *pqm,
1080	unsigned int qid,
1081	uint32_t *mqd_size,
1082	uint32_t *ctl_stack_size)
1083	{
1084	struct process_queue_node *pqn;
1085
1086	pqn = get_queue_by_qid(pqm, qid);
1087	if (!pqn) {
1088	pr_debug("amdkfd: No queue %d exists for operation\n", qid);
1089	return -EFAULT;
1090	}
1091
1092	if (!pqn->q->device->dqm->ops.get_queue_checkpoint_info) {
1093	pr_err("amdkfd: queue dumping not supported on this device\n");
1094	return -EOPNOTSUPP;
1095	}
1096
1097	pqn->q->device->dqm->ops.get_queue_checkpoint_info(pqn->q->device->dqm,
1098	pqn->q, mqd_size,
1099	ctl_stack_size);
1100	return `0`;
1101	}
1102
1103	#if defined(CONFIG_DEBUG_FS)
1104
1105	int pqm_debugfs_mqds(struct seq_file m, void* *data)
1106	{
1107	struct process_queue_manager *pqm = data;
1108	struct process_queue_node *pqn;
1109	struct queue *q;
1110	enum KFD_MQD_TYPE mqd_type;
1111	struct mqd_manager *mqd_mgr;
1112	int r = `0`, xcc, num_xccs = `1`;
1113	void *mqd;
1114	uint64_t size = `0`;
1115
1116	list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
1117	if (pqn->q) {
1118	q = pqn->q;
1119	switch (q->properties.type) {
1120	case KFD_QUEUE_TYPE_SDMA:
1121	case KFD_QUEUE_TYPE_SDMA_XGMI:
1122	seq_printf(m, fmt: " SDMA queue on device %x\n",
1123	q->device->id);
1124	mqd_type = KFD_MQD_TYPE_SDMA;
1125	break;
1126	case KFD_QUEUE_TYPE_COMPUTE:
1127	seq_printf(m, fmt: " Compute queue on device %x\n",
1128	q->device->id);
1129	mqd_type = KFD_MQD_TYPE_CP;
1130	num_xccs = NUM_XCC(q->device->xcc_mask);
1131	break;
1132	default:
1133	seq_printf(m,
1134	fmt: " Bad user queue type %d on device %x\n",
1135	q->properties.type, q->device->id);
1136	continue;
1137	}
1138	mqd_mgr = q->device->dqm->mqd_mgrs[mqd_type];
1139	size = mqd_mgr->mqd_stride(mqd_mgr,
1140	&q->properties);
1141	} else if (pqn->kq) {
1142	q = pqn->kq->queue;
1143	mqd_mgr = pqn->kq->mqd_mgr;
1144	switch (q->properties.type) {
1145	case KFD_QUEUE_TYPE_DIQ:
1146	seq_printf(m, fmt: " DIQ on device %x\n",
1147	pqn->kq->dev->id);
1148	break;
1149	default:
1150	seq_printf(m,
1151	fmt: " Bad kernel queue type %d on device %x\n",
1152	q->properties.type,
1153	pqn->kq->dev->id);
1154	continue;
1155	}
1156	} else {
1157	seq_printf(m,
1158	fmt: " Weird: Queue node with neither kernel nor user queue\n");
1159	continue;
1160	}
1161
1162	for (xcc = `0`; xcc < num_xccs; xcc++) {
1163	mqd = q->mqd + size * xcc;
1164	r = mqd_mgr->debugfs_show_mqd(m, mqd);
1165	if (r != `0`)
1166	break;
1167	}
1168	}
1169
1170	return r;
1171	}
1172
1173	#endif
1174

source code of linux/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c