amdgpu_job.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c]

1	/*
2	* Copyright 2015 Advanced Micro Devices, Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*
22	*
23	*/
24	#include <linux/kthread.h>
25	#include <linux/wait.h>
26	#include <linux/sched.h>
27
28	#include <drm/drm_drv.h>
29
30	#include "amdgpu.h"
31	#include "amdgpu_trace.h"
32	#include "amdgpu_reset.h"
33	#include "amdgpu_dev_coredump.h"
34	#include "amdgpu_xgmi.h"
35
36	static void amdgpu_job_do_core_dump(struct amdgpu_device *adev,
37	struct amdgpu_job *job)
38	{
39	int i;
40
41	dev_info(adev->dev, "Dumping IP State\n");
42	for (i = `0`; i < adev->num_ip_blocks; i++)
43	if (adev->ip_blocks[i].version->funcs->dump_ip_state)
44	adev->ip_blocks[i].version->funcs
45	->dump_ip_state((void *)&adev->ip_blocks[i]);
46	dev_info(adev->dev, "Dumping IP State Completed\n");
47
48	amdgpu_coredump(adev, skip_vram_check: true, vram_lost: false, job);
49	}
50
51	static void amdgpu_job_core_dump(struct amdgpu_device *adev,
52	struct amdgpu_job *job)
53	{
54	struct list_head device_list, *device_list_handle = NULL;
55	struct amdgpu_device *tmp_adev = NULL;
56	struct amdgpu_hive_info *hive = NULL;
57
58	if (!amdgpu_sriov_vf(adev))
59	hive = amdgpu_get_xgmi_hive(adev);
60	if (hive)
61	mutex_lock(&hive->hive_lock);
62	/*
63	* Reuse the logic in amdgpu_device_gpu_recover() to build list of
64	* devices for code dump
65	*/
66	INIT_LIST_HEAD(list: &device_list);
67	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > `1`) && hive) {
68	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head)
69	list_add_tail(new: &tmp_adev->reset_list, head: &device_list);
70	if (!list_is_first(list: &adev->reset_list, head: &device_list))
71	list_rotate_to_front(list: &adev->reset_list, head: &device_list);
72	device_list_handle = &device_list;
73	} else {
74	list_add_tail(new: &adev->reset_list, head: &device_list);
75	device_list_handle = &device_list;
76	}
77
78	/ Do the coredump for each device /
79	list_for_each_entry(tmp_adev, device_list_handle, reset_list)
80	amdgpu_job_do_core_dump(adev: tmp_adev, job);
81
82	if (hive) {
83	mutex_unlock(lock: &hive->hive_lock);
84	amdgpu_put_xgmi_hive(hive);
85	}
86	}
87
88	static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
89	{
90	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
91	struct amdgpu_job *job = to_amdgpu_job(s_job);
92	struct drm_wedge_task_info *info = NULL;
93	struct amdgpu_task_info *ti = NULL;
94	struct amdgpu_device *adev = ring->adev;
95	int idx, r;
96
97	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) {
98	dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s",
99	__func__, s_job->sched->name);
100
101	/ Effectively the job is aborted as the device is gone /
102	return DRM_GPU_SCHED_STAT_ENODEV;
103	}
104
105	/*
106	* Do the coredump immediately after a job timeout to get a very
107	* close dump/snapshot/representation of GPU's current error status
108	* Skip it for SRIOV, since VF FLR will be triggered by host driver
109	* before job timeout
110	*/
111	if (!amdgpu_sriov_vf(adev))
112	amdgpu_job_core_dump(adev, job);
113
114	if (amdgpu_gpu_recovery &&
115	amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_SOFT_RESET) &&
116	amdgpu_ring_soft_recovery(ring, vmid: job->vmid, fence: s_job->s_fence->parent)) {
117	dev_err(adev->dev, "ring %s timeout, but soft recovered\n",
118	s_job->sched->name);
119	goto exit;
120	}
121
122	dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n",
123	job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
124	ring->fence_drv.sync_seq);
125
126	ti = amdgpu_vm_get_task_info_pasid(adev: ring->adev, pasid: job->pasid);
127	if (ti) {
128	amdgpu_vm_print_task_info(adev, task_info: ti);
129	info = &ti->task;
130	}
131
132	/ attempt a per ring reset /
133	if (amdgpu_gpu_recovery &&
134	amdgpu_ring_is_reset_type_supported(ring, AMDGPU_RESET_TYPE_PER_QUEUE) &&
135	ring->funcs->reset) {
136	dev_err(adev->dev, "Starting %s ring reset\n",
137	s_job->sched->name);
138	r = amdgpu_ring_reset(ring, job->vmid, job->hw_fence);
139	if (!r) {
140	atomic_inc(v: &ring->adev->gpu_reset_counter);
141	dev_err(adev->dev, "Ring %s reset succeeded\n",
142	ring->sched.name);
143	drm_dev_wedged_event(dev: adev_to_drm(adev),
144	DRM_WEDGE_RECOVERY_NONE, info);
145	goto exit;
146	}
147	dev_err(adev->dev, "Ring %s reset failed\n", ring->sched.name);
148	}
149
150	dma_fence_set_error(fence: &s_job->s_fence->finished, error: -ETIME);
151
152	if (amdgpu_device_should_recover_gpu(adev: ring->adev)) {
153	struct amdgpu_reset_context reset_context;
154	memset(&reset_context, `0`, sizeof(reset_context));
155
156	reset_context.method = AMD_RESET_METHOD_NONE;
157	reset_context.reset_req_dev = adev;
158	reset_context.src = AMDGPU_RESET_SRC_JOB;
159	clear_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context.flags);
160
161	/*
162	* To avoid an unnecessary extra coredump, as we have already
163	* got the very close representation of GPU's error status
164	*/
165	set_bit(nr: AMDGPU_SKIP_COREDUMP, addr: &reset_context.flags);
166
167	r = amdgpu_device_gpu_recover(adev: ring->adev, job, reset_context: &reset_context);
168	if (r)
169	dev_err(adev->dev, "GPU Recovery Failed: %d\n", r);
170	} else {
171	drm_sched_suspend_timeout(sched: &ring->sched);
172	if (amdgpu_sriov_vf(adev))
173	adev->virt.tdr_debug = true;
174	}
175
176	exit:
177	amdgpu_vm_put_task_info(task_info: ti);
178	drm_dev_exit(idx);
179	return DRM_GPU_SCHED_STAT_RESET;
180	}
181
182	int amdgpu_job_alloc(struct amdgpu_device adev, struct* amdgpu_vm *vm,
183	struct drm_sched_entity entity, void* *owner,
184	unsigned int num_ibs, struct amdgpu_job **job,
185	u64 drm_client_id)
186	{
187	struct amdgpu_fence *af;
188	int r;
189
190	if (num_ibs == `0`)
191	return -EINVAL;
192
193	job = kzalloc(struct_size(job, ibs, num_ibs), GFP_KERNEL);
194	if (!*job)
195	return -ENOMEM;
196
197	af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
198	if (!af) {
199	r = -ENOMEM;
200	goto err_job;
201	}
202	(*job)->hw_fence = af;
203
204	af = kzalloc(sizeof(struct amdgpu_fence), GFP_KERNEL);
205	if (!af) {
206	r = -ENOMEM;
207	goto err_fence;
208	}
209	(*job)->hw_vm_fence = af;
210
211	(*job)->vm = vm;
212
213	amdgpu_sync_create(sync: &(*job)->explicit_sync);
214	(*job)->generation = amdgpu_vm_generation(adev, vm);
215	(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
216
217	if (!entity)
218	return `0`;
219
220	r = drm_sched_job_init(job: &(*job)->base, entity, credits: `1`, owner, drm_client_id);
221	if (!r)
222	return `0`;
223
224	kfree(objp: (*job)->hw_vm_fence);
225
226	err_fence:
227	kfree(objp: (*job)->hw_fence);
228	err_job:
229	kfree(objp: *job);
230	*job = NULL;
231
232	return r;
233	}
234
235	int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
236	struct drm_sched_entity entity, void* *owner,
237	size_t size, enum amdgpu_ib_pool_type pool_type,
238	struct amdgpu_job **job, u64 k_job_id)
239	{
240	int r;
241
242	r = amdgpu_job_alloc(adev, NULL, entity, owner, num_ibs: `1`, job,
243	drm_client_id: k_job_id);
244	if (r)
245	return r;
246
247	(*job)->num_ibs = `1`;
248	r = amdgpu_ib_get(adev, NULL, size, pool: pool_type, ib: &(*job)->ibs[`0`]);
249	if (r) {
250	if (entity)
251	drm_sched_job_cleanup(job: &(*job)->base);
252	kfree(objp: (*job)->hw_vm_fence);
253	kfree(objp: (*job)->hw_fence);
254	kfree(objp: *job);
255	*job = NULL;
256	}
257
258	return r;
259	}
260
261	void amdgpu_job_set_resources(struct amdgpu_job job, struct* amdgpu_bo *gds,
262	struct amdgpu_bo gws, struct* amdgpu_bo *oa)
263	{
264	if (gds) {
265	job->gds_base = amdgpu_bo_gpu_offset(bo: gds) >> PAGE_SHIFT;
266	job->gds_size = amdgpu_bo_size(bo: gds) >> PAGE_SHIFT;
267	}
268	if (gws) {
269	job->gws_base = amdgpu_bo_gpu_offset(bo: gws) >> PAGE_SHIFT;
270	job->gws_size = amdgpu_bo_size(bo: gws) >> PAGE_SHIFT;
271	}
272	if (oa) {
273	job->oa_base = amdgpu_bo_gpu_offset(bo: oa) >> PAGE_SHIFT;
274	job->oa_size = amdgpu_bo_size(bo: oa) >> PAGE_SHIFT;
275	}
276	}
277
278	void amdgpu_job_free_resources(struct amdgpu_job *job)
279	{
280	struct dma_fence *f;
281	unsigned i;
282
283	/ Check if any fences were initialized /
284	if (job->base.s_fence && job->base.s_fence->finished.ops)
285	f = &job->base.s_fence->finished;
286	else if (job->hw_fence && job->hw_fence->base.ops)
287	f = &job->hw_fence->base;
288	else
289	f = NULL;
290
291	for (i = `0`; i < job->num_ibs; ++i)
292	amdgpu_ib_free(ib: &job->ibs[i], f);
293	}
294
295	static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
296	{
297	struct amdgpu_job *job = to_amdgpu_job(s_job);
298
299	drm_sched_job_cleanup(job: s_job);
300
301	amdgpu_sync_free(sync: &job->explicit_sync);
302
303	if (job->hw_fence->base.ops)
304	dma_fence_put(fence: &job->hw_fence->base);
305	else
306	kfree(objp: job->hw_fence);
307	if (job->hw_vm_fence->base.ops)
308	dma_fence_put(fence: &job->hw_vm_fence->base);
309	else
310	kfree(objp: job->hw_vm_fence);
311
312	kfree(objp: job);
313	}
314
315	void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
316	struct amdgpu_job *leader)
317	{
318	struct dma_fence *fence = &leader->base.s_fence->scheduled;
319
320	WARN_ON(job->gang_submit);
321
322	/*
323	* Don't add a reference when we are the gang leader to avoid circle
324	* dependency.
325	*/
326	if (job != leader)
327	dma_fence_get(fence);
328	job->gang_submit = fence;
329	}
330
331	void amdgpu_job_free(struct amdgpu_job *job)
332	{
333	if (job->base.entity)
334	drm_sched_job_cleanup(job: &job->base);
335
336	amdgpu_job_free_resources(job);
337	amdgpu_sync_free(sync: &job->explicit_sync);
338	if (job->gang_submit != &job->base.s_fence->scheduled)
339	dma_fence_put(fence: job->gang_submit);
340
341	if (job->hw_fence->base.ops)
342	dma_fence_put(fence: &job->hw_fence->base);
343	else
344	kfree(objp: job->hw_fence);
345	if (job->hw_vm_fence->base.ops)
346	dma_fence_put(fence: &job->hw_vm_fence->base);
347	else
348	kfree(objp: job->hw_vm_fence);
349
350	kfree(objp: job);
351	}
352
353	struct dma_fence amdgpu_job_submit(struct* amdgpu_job *job)
354	{
355	struct dma_fence *f;
356
357	drm_sched_job_arm(job: &job->base);
358	f = dma_fence_get(fence: &job->base.s_fence->finished);
359	amdgpu_job_free_resources(job);
360	drm_sched_entity_push_job(sched_job: &job->base);
361
362	return f;
363	}
364
365	int amdgpu_job_submit_direct(struct amdgpu_job job, struct* amdgpu_ring *ring,
366	struct dma_fence **fence)
367	{
368	int r;
369
370	job->base.sched = &ring->sched;
371	r = amdgpu_ib_schedule(ring, num_ibs: job->num_ibs, ibs: job->ibs, job, f: fence);
372
373	if (r)
374	return r;
375
376	amdgpu_job_free(job);
377	return `0`;
378	}
379
380	static struct dma_fence *
381	amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
382	struct drm_sched_entity *s_entity)
383	{
384	struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
385	struct amdgpu_job *job = to_amdgpu_job(sched_job);
386	struct dma_fence *fence;
387	int r;
388
389	r = drm_sched_entity_error(entity: s_entity);
390	if (r)
391	goto error;
392
393	if (job->gang_submit) {
394	fence = amdgpu_device_switch_gang(adev: ring->adev, gang: job->gang_submit);
395	if (fence)
396	return fence;
397	}
398
399	fence = amdgpu_device_enforce_isolation(adev: ring->adev, ring, job);
400	if (fence)
401	return fence;
402
403	if (job->vm && !job->vmid) {
404	r = amdgpu_vmid_grab(vm: job->vm, ring, job, fence: &fence);
405	if (r) {
406	dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r);
407	goto error;
408	}
409	return fence;
410	}
411
412	return NULL;
413
414	error:
415	dma_fence_set_error(fence: &job->base.s_fence->finished, error: r);
416	return NULL;
417	}
418
419	static struct dma_fence amdgpu_job_run(struct* drm_sched_job *sched_job)
420	{
421	struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
422	struct amdgpu_device *adev = ring->adev;
423	struct dma_fence fence = NULL, finished;
424	struct amdgpu_job *job;
425	int r = `0`;
426
427	job = to_amdgpu_job(sched_job);
428	finished = &job->base.s_fence->finished;
429
430	trace_amdgpu_sched_run_job(job);
431
432	/ Skip job if VRAM is lost and never resubmit gangs /
433	if (job->generation != amdgpu_vm_generation(adev, vm: job->vm) \|\|
434	(job->job_run_counter && job->gang_submit))
435	dma_fence_set_error(fence: finished, error: -ECANCELED);
436
437	if (finished->error < `0`) {
438	dev_dbg(adev->dev, "Skip scheduling IBs in ring(%s)",
439	ring->name);
440	} else {
441	r = amdgpu_ib_schedule(ring, num_ibs: job->num_ibs, ibs: job->ibs, job,
442	f: &fence);
443	if (r)
444	dev_err(adev->dev,
445	"Error scheduling IBs (%d) in ring(%s)", r,
446	ring->name);
447	}
448
449	job->job_run_counter++;
450	amdgpu_job_free_resources(job);
451
452	fence = r ? ERR_PTR(error: r) : fence;
453	return fence;
454	}
455
456	/*
457	* This is a duplicate function from DRM scheduler sched_internal.h.
458	* Plan is to remove it when amdgpu_job_stop_all_jobs_on_sched is removed, due
459	* latter being incorrect and racy.
460	*
461	* See https://lore.kernel.org/amd-gfx/44edde63-7181-44fb-a4f7-94e50514f539@amd.com/
462	*/
463	static struct drm_sched_job *
464	drm_sched_entity_queue_pop(struct drm_sched_entity *entity)
465	{
466	struct spsc_node *node;
467
468	node = spsc_queue_pop(queue: &entity->job_queue);
469	if (!node)
470	return NULL;
471
472	return container_of(node, struct drm_sched_job, queue_node);
473	}
474
475	void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
476	{
477	struct drm_sched_job *s_job;
478	struct drm_sched_entity *s_entity = NULL;
479	int i;
480
481	/ Signal all jobs not yet scheduled /
482	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
483	struct drm_sched_rq *rq = sched->sched_rq[i];
484	spin_lock(lock: &rq->lock);
485	list_for_each_entry(s_entity, &rq->entities, list) {
486	while ((s_job = drm_sched_entity_queue_pop(entity: s_entity))) {
487	struct drm_sched_fence *s_fence = s_job->s_fence;
488
489	dma_fence_signal(fence: &s_fence->scheduled);
490	dma_fence_set_error(fence: &s_fence->finished, error: -EHWPOISON);
491	dma_fence_signal(fence: &s_fence->finished);
492	}
493	}
494	spin_unlock(lock: &rq->lock);
495	}
496
497	/ Signal all jobs already scheduled to HW /
498	list_for_each_entry(s_job, &sched->pending_list, list) {
499	struct drm_sched_fence *s_fence = s_job->s_fence;
500
501	dma_fence_set_error(fence: &s_fence->finished, error: -EHWPOISON);
502	dma_fence_signal(fence: &s_fence->finished);
503	}
504	}
505
506	const struct drm_sched_backend_ops amdgpu_sched_ops = {
507	.prepare_job = amdgpu_job_prepare_job,
508	.run_job = amdgpu_job_run,
509	.timedout_job = amdgpu_job_timedout,
510	.free_job = amdgpu_job_free_cb
511	};
512

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c