sched_main.c source code [linux/drivers/gpu/drm/scheduler/sched_main.c]

1	/*
2	* Copyright 2015 Advanced Micro Devices, Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*
22	*/
23
24	/**
25	* DOC: Overview
26	*
27	* The GPU scheduler provides entities which allow userspace to push jobs
28	* into software queues which are then scheduled on a hardware run queue.
29	* The software queues have a priority among them. The scheduler selects the entities
30	* from the run queue using a FIFO. The scheduler provides dependency handling
31	* features among jobs. The driver is supposed to provide callback functions for
32	* backend operations to the scheduler like submitting a job to hardware run queue,
33	* returning the dependencies of a job etc.
34	*
35	* The organisation of the scheduler is the following:
36	*
37	* 1. Each hw run queue has one scheduler
38	* 2. Each scheduler has multiple run queues with different priorities
39	* (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
40	* 3. Each scheduler run queue has a queue of entities to schedule
41	* 4. Entities themselves maintain a queue of jobs that will be scheduled on
42	* the hardware.
43	*
44	* The jobs in an entity are always scheduled in the order in which they were pushed.
45	*
46	* Note that once a job was taken from the entities queue and pushed to the
47	* hardware, i.e. the pending queue, the entity must not be referenced anymore
48	* through the jobs entity pointer.
49	*/
50
51	/**
52	* DOC: Flow Control
53	*
54	* The DRM GPU scheduler provides a flow control mechanism to regulate the rate
55	* in which the jobs fetched from scheduler entities are executed.
56	*
57	* In this context the &drm_gpu_scheduler keeps track of a driver specified
58	* credit limit representing the capacity of this scheduler and a credit count;
59	* every &drm_sched_job carries a driver specified number of credits.
60	*
61	* Once a job is executed (but not yet finished), the job's credits contribute
62	* to the scheduler's credit count until the job is finished. If by executing
63	* one more job the scheduler's credit count would exceed the scheduler's
64	* credit limit, the job won't be executed. Instead, the scheduler will wait
65	* until the credit count has decreased enough to not overflow its credit limit.
66	* This implies waiting for previously executed jobs.
67	*/
68
69	#include <linux/export.h>
70	#include <linux/wait.h>
71	#include <linux/sched.h>
72	#include <linux/completion.h>
73	#include <linux/dma-resv.h>
74	#include <uapi/linux/sched/types.h>
75
76	#include <drm/drm_print.h>
77	#include <drm/drm_gem.h>
78	#include <drm/drm_syncobj.h>
79	#include <drm/gpu_scheduler.h>
80	#include <drm/spsc_queue.h>
81
82	#include "sched_internal.h"
83
84	#define CREATE_TRACE_POINTS
85	#include "gpu_scheduler_trace.h"
86
87	int drm_sched_policy = DRM_SCHED_POLICY_FIFO;
88
89	/**
90	* DOC: sched_policy (int)
91	* Used to override default entities scheduling policy in a run queue.
92	*/
93	MODULE_PARM_DESC(sched_policy, "Specify the scheduling policy for entities on a run-queue, " __stringify(DRM_SCHED_POLICY_RR) " = Round Robin, " __stringify(DRM_SCHED_POLICY_FIFO) " = FIFO (default).");
94	module_param_named(sched_policy, drm_sched_policy, int, `0444`);
95
96	static u32 drm_sched_available_credits(struct drm_gpu_scheduler *sched)
97	{
98	u32 credits;
99
100	WARN_ON(check_sub_overflow(sched->credit_limit,
101	atomic_read(&sched->credit_count),
102	&credits));
103
104	return credits;
105	}
106
107	/**
108	* drm_sched_can_queue -- Can we queue more to the hardware?
109	* @sched: scheduler instance
110	* @entity: the scheduler entity
111	*
112	* Return true if we can push at least one more job from @entity, false
113	* otherwise.
114	*/
115	static bool drm_sched_can_queue(struct drm_gpu_scheduler *sched,
116	struct drm_sched_entity *entity)
117	{
118	struct drm_sched_job *s_job;
119
120	s_job = drm_sched_entity_queue_peek(entity);
121	if (!s_job)
122	return false;
123
124	/ If a job exceeds the credit limit, truncate it to the credit limit*
125	* itself to guarantee forward progress.
126	*/
127	if (s_job->credits > sched->credit_limit) {
128	dev_WARN(sched->dev,
129	"Jobs may not exceed the credit limit, truncate.\n");
130	s_job->credits = sched->credit_limit;
131	}
132
133	return drm_sched_available_credits(sched) >= s_job->credits;
134	}
135
136	static __always_inline bool drm_sched_entity_compare_before(struct rb_node *a,
137	const struct rb_node *b)
138	{
139	struct drm_sched_entity ent_a = rb_entry((a), struct* drm_sched_entity, rb_tree_node);
140	struct drm_sched_entity ent_b = rb_entry((b), struct* drm_sched_entity, rb_tree_node);
141
142	return ktime_before(cmp1: ent_a->oldest_job_waiting, cmp2: ent_b->oldest_job_waiting);
143	}
144
145	static void drm_sched_rq_remove_fifo_locked(struct drm_sched_entity *entity,
146	struct drm_sched_rq *rq)
147	{
148	if (!RB_EMPTY_NODE(&entity->rb_tree_node)) {
149	rb_erase_cached(node: &entity->rb_tree_node, root: &rq->rb_tree_root);
150	RB_CLEAR_NODE(&entity->rb_tree_node);
151	}
152	}
153
154	void drm_sched_rq_update_fifo_locked(struct drm_sched_entity *entity,
155	struct drm_sched_rq *rq,
156	ktime_t ts)
157	{
158	/*
159	* Both locks need to be grabbed, one to protect from entity->rq change
160	* for entity from within concurrent drm_sched_entity_select_rq and the
161	* other to update the rb tree structure.
162	*/
163	lockdep_assert_held(&entity->lock);
164	lockdep_assert_held(&rq->lock);
165
166	drm_sched_rq_remove_fifo_locked(entity, rq);
167
168	entity->oldest_job_waiting = ts;
169
170	rb_add_cached(node: &entity->rb_tree_node, tree: &rq->rb_tree_root,
171	less: drm_sched_entity_compare_before);
172	}
173
174	/**
175	* drm_sched_rq_init - initialize a given run queue struct
176	*
177	* @sched: scheduler instance to associate with this run queue
178	* @rq: scheduler run queue
179	*
180	* Initializes a scheduler runqueue.
181	*/
182	static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
183	struct drm_sched_rq *rq)
184	{
185	spin_lock_init(&rq->lock);
186	INIT_LIST_HEAD(list: &rq->entities);
187	rq->rb_tree_root = RB_ROOT_CACHED;
188	rq->current_entity = NULL;
189	rq->sched = sched;
190	}
191
192	/**
193	* drm_sched_rq_add_entity - add an entity
194	*
195	* @rq: scheduler run queue
196	* @entity: scheduler entity
197	*
198	* Adds a scheduler entity to the run queue.
199	*/
200	void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
201	struct drm_sched_entity *entity)
202	{
203	lockdep_assert_held(&entity->lock);
204	lockdep_assert_held(&rq->lock);
205
206	if (!list_empty(head: &entity->list))
207	return;
208
209	atomic_inc(v: rq->sched->score);
210	list_add_tail(new: &entity->list, head: &rq->entities);
211	}
212
213	/**
214	* drm_sched_rq_remove_entity - remove an entity
215	*
216	* @rq: scheduler run queue
217	* @entity: scheduler entity
218	*
219	* Removes a scheduler entity from the run queue.
220	*/
221	void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
222	struct drm_sched_entity *entity)
223	{
224	lockdep_assert_held(&entity->lock);
225
226	if (list_empty(head: &entity->list))
227	return;
228
229	spin_lock(lock: &rq->lock);
230
231	atomic_dec(v: rq->sched->score);
232	list_del_init(entry: &entity->list);
233
234	if (rq->current_entity == entity)
235	rq->current_entity = NULL;
236
237	if (drm_sched_policy == DRM_SCHED_POLICY_FIFO)
238	drm_sched_rq_remove_fifo_locked(entity, rq);
239
240	spin_unlock(lock: &rq->lock);
241	}
242
243	/**
244	* drm_sched_rq_select_entity_rr - Select an entity which could provide a job to run
245	*
246	* @sched: the gpu scheduler
247	* @rq: scheduler run queue to check.
248	*
249	* Try to find the next ready entity.
250	*
251	* Return an entity if one is found; return an error-pointer (!NULL) if an
252	* entity was ready, but the scheduler had insufficient credits to accommodate
253	* its job; return NULL, if no ready entity was found.
254	*/
255	static struct drm_sched_entity *
256	drm_sched_rq_select_entity_rr(struct drm_gpu_scheduler *sched,
257	struct drm_sched_rq *rq)
258	{
259	struct drm_sched_entity *entity;
260
261	spin_lock(lock: &rq->lock);
262
263	entity = rq->current_entity;
264	if (entity) {
265	list_for_each_entry_continue(entity, &rq->entities, list) {
266	if (drm_sched_entity_is_ready(entity))
267	goto found;
268	}
269	}
270
271	list_for_each_entry(entity, &rq->entities, list) {
272	if (drm_sched_entity_is_ready(entity))
273	goto found;
274
275	if (entity == rq->current_entity)
276	break;
277	}
278
279	spin_unlock(lock: &rq->lock);
280
281	return NULL;
282
283	found:
284	if (!drm_sched_can_queue(sched, entity)) {
285	/*
286	* If scheduler cannot take more jobs signal the caller to not
287	* consider lower priority queues.
288	*/
289	entity = ERR_PTR(error: -ENOSPC);
290	} else {
291	rq->current_entity = entity;
292	reinit_completion(x: &entity->entity_idle);
293	}
294
295	spin_unlock(lock: &rq->lock);
296
297	return entity;
298	}
299
300	/**
301	* drm_sched_rq_select_entity_fifo - Select an entity which provides a job to run
302	*
303	* @sched: the gpu scheduler
304	* @rq: scheduler run queue to check.
305	*
306	* Find oldest waiting ready entity.
307	*
308	* Return an entity if one is found; return an error-pointer (!NULL) if an
309	* entity was ready, but the scheduler had insufficient credits to accommodate
310	* its job; return NULL, if no ready entity was found.
311	*/
312	static struct drm_sched_entity *
313	drm_sched_rq_select_entity_fifo(struct drm_gpu_scheduler *sched,
314	struct drm_sched_rq *rq)
315	{
316	struct rb_node *rb;
317
318	spin_lock(lock: &rq->lock);
319	for (rb = rb_first_cached(&rq->rb_tree_root); rb; rb = rb_next(rb)) {
320	struct drm_sched_entity *entity;
321
322	entity = rb_entry(rb, struct drm_sched_entity, rb_tree_node);
323	if (drm_sched_entity_is_ready(entity)) {
324	/ If we can't queue yet, preserve the current entity in*
325	* terms of fairness.
326	*/
327	if (!drm_sched_can_queue(sched, entity)) {
328	spin_unlock(lock: &rq->lock);
329	return ERR_PTR(error: -ENOSPC);
330	}
331
332	reinit_completion(x: &entity->entity_idle);
333	break;
334	}
335	}
336	spin_unlock(lock: &rq->lock);
337
338	return rb ? rb_entry(rb, struct drm_sched_entity, rb_tree_node) : NULL;
339	}
340
341	/**
342	* drm_sched_run_job_queue - enqueue run-job work
343	* @sched: scheduler instance
344	*/
345	static void drm_sched_run_job_queue(struct drm_gpu_scheduler *sched)
346	{
347	if (!READ_ONCE(sched->pause_submit))
348	queue_work(wq: sched->submit_wq, work: &sched->work_run_job);
349	}
350
351	/**
352	* drm_sched_run_free_queue - enqueue free-job work
353	* @sched: scheduler instance
354	*/
355	static void drm_sched_run_free_queue(struct drm_gpu_scheduler *sched)
356	{
357	if (!READ_ONCE(sched->pause_submit))
358	queue_work(wq: sched->submit_wq, work: &sched->work_free_job);
359	}
360
361	/**
362	* drm_sched_job_done - complete a job
363	* @s_job: pointer to the job which is done
364	*
365	* Finish the job's fence and resubmit the work items.
366	*/
367	static void drm_sched_job_done(struct drm_sched_job s_job, int* result)
368	{
369	struct drm_sched_fence *s_fence = s_job->s_fence;
370	struct drm_gpu_scheduler *sched = s_fence->sched;
371
372	atomic_sub(i: s_job->credits, v: &sched->credit_count);
373	atomic_dec(v: sched->score);
374
375	trace_drm_sched_job_done(fence: s_fence);
376
377	dma_fence_get(fence: &s_fence->finished);
378	drm_sched_fence_finished(fence: s_fence, result);
379	dma_fence_put(fence: &s_fence->finished);
380	drm_sched_run_free_queue(sched);
381	}
382
383	/**
384	* drm_sched_job_done_cb - the callback for a done job
385	* @f: fence
386	* @cb: fence callbacks
387	*/
388	static void drm_sched_job_done_cb(struct dma_fence f, struct* dma_fence_cb *cb)
389	{
390	struct drm_sched_job s_job = container_of(cb, struct* drm_sched_job, cb);
391
392	drm_sched_job_done(s_job, result: f->error);
393	}
394
395	/**
396	* drm_sched_start_timeout - start timeout for reset worker
397	*
398	* @sched: scheduler instance to start the worker for
399	*
400	* Start the timeout for the given scheduler.
401	*/
402	static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
403	{
404	lockdep_assert_held(&sched->job_list_lock);
405
406	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
407	!list_empty(head: &sched->pending_list))
408	mod_delayed_work(wq: sched->timeout_wq, dwork: &sched->work_tdr, delay: sched->timeout);
409	}
410
411	static void drm_sched_start_timeout_unlocked(struct drm_gpu_scheduler *sched)
412	{
413	spin_lock(lock: &sched->job_list_lock);
414	drm_sched_start_timeout(sched);
415	spin_unlock(lock: &sched->job_list_lock);
416	}
417
418	/**
419	* drm_sched_tdr_queue_imm: - immediately start job timeout handler
420	*
421	* @sched: scheduler for which the timeout handling should be started.
422	*
423	* Start timeout handling immediately for the named scheduler.
424	*/
425	void drm_sched_tdr_queue_imm(struct drm_gpu_scheduler *sched)
426	{
427	spin_lock(lock: &sched->job_list_lock);
428	sched->timeout = `0`;
429	drm_sched_start_timeout(sched);
430	spin_unlock(lock: &sched->job_list_lock);
431	}
432	EXPORT_SYMBOL(drm_sched_tdr_queue_imm);
433
434	/**
435	* drm_sched_fault - immediately start timeout handler
436	*
437	* @sched: scheduler where the timeout handling should be started.
438	*
439	* Start timeout handling immediately when the driver detects a hardware fault.
440	*/
441	void drm_sched_fault(struct drm_gpu_scheduler *sched)
442	{
443	if (sched->timeout_wq)
444	mod_delayed_work(wq: sched->timeout_wq, dwork: &sched->work_tdr, delay: `0`);
445	}
446	EXPORT_SYMBOL(drm_sched_fault);
447
448	/**
449	* drm_sched_suspend_timeout - Suspend scheduler job timeout
450	*
451	* @sched: scheduler instance for which to suspend the timeout
452	*
453	* Suspend the delayed work timeout for the scheduler. This is done by
454	* modifying the delayed work timeout to an arbitrary large value,
455	* MAX_SCHEDULE_TIMEOUT in this case.
456	*
457	* Returns the timeout remaining
458	*
459	*/
460	unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
461	{
462	unsigned long sched_timeout, now = jiffies;
463
464	sched_timeout = sched->work_tdr.timer.expires;
465
466	/*
467	* Modify the timeout to an arbitrarily large value. This also prevents
468	* the timeout to be restarted when new submissions arrive
469	*/
470	if (mod_delayed_work(wq: sched->timeout_wq, dwork: &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
471	&& time_after(sched_timeout, now))
472	return sched_timeout - now;
473	else
474	return sched->timeout;
475	}
476	EXPORT_SYMBOL(drm_sched_suspend_timeout);
477
478	/**
479	* drm_sched_resume_timeout - Resume scheduler job timeout
480	*
481	* @sched: scheduler instance for which to resume the timeout
482	* @remaining: remaining timeout
483	*
484	* Resume the delayed work timeout for the scheduler.
485	*/
486	void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
487	unsigned long remaining)
488	{
489	spin_lock(lock: &sched->job_list_lock);
490
491	if (list_empty(head: &sched->pending_list))
492	cancel_delayed_work(dwork: &sched->work_tdr);
493	else
494	mod_delayed_work(wq: sched->timeout_wq, dwork: &sched->work_tdr, delay: remaining);
495
496	spin_unlock(lock: &sched->job_list_lock);
497	}
498	EXPORT_SYMBOL(drm_sched_resume_timeout);
499
500	static void drm_sched_job_begin(struct drm_sched_job *s_job)
501	{
502	struct drm_gpu_scheduler *sched = s_job->sched;
503
504	spin_lock(lock: &sched->job_list_lock);
505	list_add_tail(new: &s_job->list, head: &sched->pending_list);
506	drm_sched_start_timeout(sched);
507	spin_unlock(lock: &sched->job_list_lock);
508	}
509
510	/**
511	* drm_sched_job_reinsert_on_false_timeout - reinsert the job on a false timeout
512	* @sched: scheduler instance
513	* @job: job to be reinserted on the pending list
514	*
515	* In the case of a "false timeout" - when a timeout occurs but the GPU isn't
516	* hung and is making progress, the scheduler must reinsert the job back into
517	* @sched->pending_list. Otherwise, the job and its resources won't be freed
518	* through the &struct drm_sched_backend_ops.free_job callback.
519	*
520	* This function must be used in "false timeout" cases only.
521	*/
522	static void drm_sched_job_reinsert_on_false_timeout(struct drm_gpu_scheduler *sched,
523	struct drm_sched_job *job)
524	{
525	spin_lock(lock: &sched->job_list_lock);
526	list_add(new: &job->list, head: &sched->pending_list);
527
528	/ After reinserting the job, the scheduler enqueues the free-job work*
529	* again if ready. Otherwise, a signaled job could be added to the
530	* pending list, but never freed.
531	*/
532	drm_sched_run_free_queue(sched);
533	spin_unlock(lock: &sched->job_list_lock);
534	}
535
536	static void drm_sched_job_timedout(struct work_struct *work)
537	{
538	struct drm_gpu_scheduler *sched;
539	struct drm_sched_job *job;
540	enum drm_gpu_sched_stat status = DRM_GPU_SCHED_STAT_RESET;
541
542	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
543
544	/ Protects against concurrent deletion in drm_sched_get_finished_job /
545	spin_lock(lock: &sched->job_list_lock);
546	job = list_first_entry_or_null(&sched->pending_list,
547	struct drm_sched_job, list);
548
549	if (job) {
550	/*
551	* Remove the bad job so it cannot be freed by a concurrent
552	* &struct drm_sched_backend_ops.free_job. It will be
553	* reinserted after the scheduler's work items have been
554	* cancelled, at which point it's safe.
555	*/
556	list_del_init(entry: &job->list);
557	spin_unlock(lock: &sched->job_list_lock);
558
559	status = job->sched->ops->timedout_job(job);
560
561	/*
562	* Guilty job did complete and hence needs to be manually removed
563	* See drm_sched_stop doc.
564	*/
565	if (sched->free_guilty) {
566	job->sched->ops->free_job(job);
567	sched->free_guilty = false;
568	}
569
570	if (status == DRM_GPU_SCHED_STAT_NO_HANG)
571	drm_sched_job_reinsert_on_false_timeout(sched, job);
572	} else {
573	spin_unlock(lock: &sched->job_list_lock);
574	}
575
576	if (status != DRM_GPU_SCHED_STAT_ENODEV)
577	drm_sched_start_timeout_unlocked(sched);
578	}
579
580	/**
581	* drm_sched_stop - stop the scheduler
582	*
583	* @sched: scheduler instance
584	* @bad: job which caused the time out
585	*
586	* Stop the scheduler and also removes and frees all completed jobs.
587	* Note: bad job will not be freed as it might be used later and so it's
588	* callers responsibility to release it manually if it's not part of the
589	* pending list any more.
590	*
591	* This function is typically used for reset recovery (see the docu of
592	* drm_sched_backend_ops.timedout_job() for details). Do not call it for
593	* scheduler teardown, i.e., before calling drm_sched_fini().
594	*
595	* As it's only used for reset recovery, drivers must not call this function
596	* in their &struct drm_sched_backend_ops.timedout_job callback when they
597	* skip a reset using &enum drm_gpu_sched_stat.DRM_GPU_SCHED_STAT_NO_HANG.
598	*/
599	void drm_sched_stop(struct drm_gpu_scheduler sched, struct* drm_sched_job *bad)
600	{
601	struct drm_sched_job s_job, tmp;
602
603	drm_sched_wqueue_stop(sched);
604
605	/*
606	* Reinsert back the bad job here - now it's safe as
607	* drm_sched_get_finished_job() cannot race against us and release the
608	* bad job at this point - we parked (waited for) any in progress
609	* (earlier) cleanups and drm_sched_get_finished_job() will not be
610	* called now until the scheduler's work items are submitted again.
611	*/
612	if (bad && bad->sched == sched)
613	/*
614	* Add at the head of the queue to reflect it was the earliest
615	* job extracted.
616	*/
617	list_add(new: &bad->list, head: &sched->pending_list);
618
619	/*
620	* Iterate the job list from later to earlier one and either deactive
621	* their HW callbacks or remove them from pending list if they already
622	* signaled.
623	* This iteration is thread safe as the scheduler's work items have been
624	* cancelled.
625	*/
626	list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
627	list) {
628	if (s_job->s_fence->parent &&
629	dma_fence_remove_callback(fence: s_job->s_fence->parent,
630	cb: &s_job->cb)) {
631	dma_fence_put(fence: s_job->s_fence->parent);
632	s_job->s_fence->parent = NULL;
633	atomic_sub(i: s_job->credits, v: &sched->credit_count);
634	} else {
635	/*
636	* remove job from pending_list.
637	* Locking here is for concurrent resume timeout
638	*/
639	spin_lock(lock: &sched->job_list_lock);
640	list_del_init(entry: &s_job->list);
641	spin_unlock(lock: &sched->job_list_lock);
642
643	/*
644	* Wait for job's HW fence callback to finish using s_job
645	* before releasing it.
646	*
647	* Job is still alive so fence refcount at least 1
648	*/
649	dma_fence_wait(fence: &s_job->s_fence->finished, intr: false);
650
651	/*
652	* We must keep bad job alive for later use during
653	* recovery by some of the drivers but leave a hint
654	* that the guilty job must be released.
655	*/
656	if (bad != s_job)
657	sched->ops->free_job(s_job);
658	else
659	sched->free_guilty = true;
660	}
661	}
662
663	/*
664	* Stop pending timer in flight as we rearm it in drm_sched_start. This
665	* avoids the pending timeout work in progress to fire right away after
666	* this TDR finished and before the newly restarted jobs had a
667	* chance to complete.
668	*/
669	cancel_delayed_work(dwork: &sched->work_tdr);
670	}
671	EXPORT_SYMBOL(drm_sched_stop);
672
673	/**
674	* drm_sched_start - recover jobs after a reset
675	*
676	* @sched: scheduler instance
677	* @errno: error to set on the pending fences
678	*
679	* This function is typically used for reset recovery (see the docu of
680	* drm_sched_backend_ops.timedout_job() for details). Do not call it for
681	* scheduler startup. The scheduler itself is fully operational after
682	* drm_sched_init() succeeded.
683	*
684	* As it's only used for reset recovery, drivers must not call this function
685	* in their &struct drm_sched_backend_ops.timedout_job callback when they
686	* skip a reset using &enum drm_gpu_sched_stat.DRM_GPU_SCHED_STAT_NO_HANG.
687	*/
688	void drm_sched_start(struct drm_gpu_scheduler sched, int* errno)
689	{
690	struct drm_sched_job s_job, tmp;
691
692	/*
693	* Locking the list is not required here as the scheduler's work items
694	* are currently not running, so no new jobs are being inserted or
695	* removed. Also concurrent GPU recovers can't run in parallel.
696	*/
697	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
698	struct dma_fence *fence = s_job->s_fence->parent;
699
700	atomic_add(i: s_job->credits, v: &sched->credit_count);
701
702	if (!fence) {
703	drm_sched_job_done(s_job, result: errno ?: -ECANCELED);
704	continue;
705	}
706
707	if (dma_fence_add_callback(fence, cb: &s_job->cb,
708	func: drm_sched_job_done_cb))
709	drm_sched_job_done(s_job, result: fence->error ?: errno);
710	}
711
712	drm_sched_start_timeout_unlocked(sched);
713	drm_sched_wqueue_start(sched);
714	}
715	EXPORT_SYMBOL(drm_sched_start);
716
717	/**
718	* drm_sched_resubmit_jobs - Deprecated, don't use in new code!
719	*
720	* @sched: scheduler instance
721	*
722	* Re-submitting jobs was a concept AMD came up as cheap way to implement
723	* recovery after a job timeout.
724	*
725	* This turned out to be not working very well. First of all there are many
726	* problem with the dma_fence implementation and requirements. Either the
727	* implementation is risking deadlocks with core memory management or violating
728	* documented implementation details of the dma_fence object.
729	*
730	* Drivers can still save and restore their state for recovery operations, but
731	* we shouldn't make this a general scheduler feature around the dma_fence
732	* interface.
733	*/
734	void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
735	{
736	struct drm_sched_job s_job, tmp;
737	uint64_t guilty_context;
738	bool found_guilty = false;
739	struct dma_fence *fence;
740
741	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
742	struct drm_sched_fence *s_fence = s_job->s_fence;
743
744	if (!found_guilty && atomic_read(v: &s_job->karma) > sched->hang_limit) {
745	found_guilty = true;
746	guilty_context = s_job->s_fence->scheduled.context;
747	}
748
749	if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
750	dma_fence_set_error(fence: &s_fence->finished, error: -ECANCELED);
751
752	fence = sched->ops->run_job(s_job);
753
754	if (IS_ERR_OR_NULL(ptr: fence)) {
755	if (IS_ERR(ptr: fence))
756	dma_fence_set_error(fence: &s_fence->finished, error: PTR_ERR(ptr: fence));
757
758	s_job->s_fence->parent = NULL;
759	} else {
760
761	s_job->s_fence->parent = dma_fence_get(fence);
762
763	/ Drop for orignal kref_init /
764	dma_fence_put(fence);
765	}
766	}
767	}
768	EXPORT_SYMBOL(drm_sched_resubmit_jobs);
769
770	/**
771	* drm_sched_job_init - init a scheduler job
772	* @job: scheduler job to init
773	* @entity: scheduler entity to use
774	* @credits: the number of credits this job contributes to the schedulers
775	* credit limit
776	* @owner: job owner for debugging
777	* @drm_client_id: &struct drm_file.client_id of the owner (used by trace
778	* events)
779	*
780	* Refer to drm_sched_entity_push_job() documentation
781	* for locking considerations.
782	*
783	* Drivers must make sure drm_sched_job_cleanup() if this function returns
784	* successfully, even when @job is aborted before drm_sched_job_arm() is called.
785	*
786	* Note that this function does not assign a valid value to each struct member
787	* of struct drm_sched_job. Take a look at that struct's documentation to see
788	* who sets which struct member with what lifetime.
789	*
790	* WARNING: amdgpu abuses &drm_sched.ready to signal when the hardware
791	* has died, which can mean that there's no valid runqueue for a @entity.
792	* This function returns -ENOENT in this case (which probably should be -EIO as
793	* a more meanigful return value).
794	*
795	* Returns 0 for success, negative error code otherwise.
796	*/
797	int drm_sched_job_init(struct drm_sched_job *job,
798	struct drm_sched_entity *entity,
799	u32 credits, void *owner,
800	uint64_t drm_client_id)
801	{
802	if (!entity->rq) {
803	/ This will most likely be followed by missing frames*
804	* or worse--a blank screen--leave a trail in the
805	* logs, so this can be debugged easier.
806	*/
807	dev_err(job->sched->dev, "%s: entity has no rq!\n", __func__);
808	return -ENOENT;
809	}
810
811	if (unlikely(!credits)) {
812	pr_err("ERROR %s: credits cannot be 0!\n", __func__);
813	return -EINVAL;
814	}
815
816	/*
817	* We don't know for sure how the user has allocated. Thus, zero the
818	* struct so that unallowed (i.e., too early) usage of pointers that
819	* this function does not set is guaranteed to lead to a NULL pointer
820	* exception instead of UB.
821	*/
822	memset(job, `0`, sizeof(*job));
823
824	job->entity = entity;
825	job->credits = credits;
826	job->s_fence = drm_sched_fence_alloc(s_entity: entity, owner, drm_client_id);
827	if (!job->s_fence)
828	return -ENOMEM;
829
830	INIT_LIST_HEAD(list: &job->list);
831
832	xa_init_flags(xa: &job->dependencies, XA_FLAGS_ALLOC);
833
834	return `0`;
835	}
836	EXPORT_SYMBOL(drm_sched_job_init);
837
838	/**
839	* drm_sched_job_arm - arm a scheduler job for execution
840	* @job: scheduler job to arm
841	*
842	* This arms a scheduler job for execution. Specifically it initializes the
843	* &drm_sched_job.s_fence of @job, so that it can be attached to struct dma_resv
844	* or other places that need to track the completion of this job. It also
845	* initializes sequence numbers, which are fundamental for fence ordering.
846	*
847	* Refer to drm_sched_entity_push_job() documentation for locking
848	* considerations.
849	*
850	* Once this function was called, you must submit @job with
851	* drm_sched_entity_push_job().
852	*
853	* This can only be called if drm_sched_job_init() succeeded.
854	*/
855	void drm_sched_job_arm(struct drm_sched_job *job)
856	{
857	struct drm_gpu_scheduler *sched;
858	struct drm_sched_entity *entity = job->entity;
859
860	BUG_ON(!entity);
861	drm_sched_entity_select_rq(entity);
862	sched = entity->rq->sched;
863
864	job->sched = sched;
865	job->s_priority = entity->priority;
866
867	drm_sched_fence_init(fence: job->s_fence, entity: job->entity);
868	}
869	EXPORT_SYMBOL(drm_sched_job_arm);
870
871	/**
872	* drm_sched_job_add_dependency - adds the fence as a job dependency
873	* @job: scheduler job to add the dependencies to
874	* @fence: the dma_fence to add to the list of dependencies.
875	*
876	* Note that @fence is consumed in both the success and error cases.
877	*
878	* Returns:
879	* 0 on success, or an error on failing to expand the array.
880	*/
881	int drm_sched_job_add_dependency(struct drm_sched_job *job,
882	struct dma_fence *fence)
883	{
884	struct dma_fence *entry;
885	unsigned long index;
886	u32 id = `0`;
887	int ret;
888
889	if (!fence)
890	return `0`;
891
892	/ Deduplicate if we already depend on a fence from the same context.*
893	* This lets the size of the array of deps scale with the number of
894	* engines involved, rather than the number of BOs.
895	*/
896	xa_for_each(&job->dependencies, index, entry) {
897	if (entry->context != fence->context)
898	continue;
899
900	if (dma_fence_is_later(f1: fence, f2: entry)) {
901	dma_fence_put(fence: entry);
902	xa_store(&job->dependencies, index, entry: fence, GFP_KERNEL);
903	} else {
904	dma_fence_put(fence);
905	}
906	return `0`;
907	}
908
909	ret = xa_alloc(xa: &job->dependencies, id: &id, entry: fence, xa_limit_32b, GFP_KERNEL);
910	if (ret != `0`)
911	dma_fence_put(fence);
912
913	return ret;
914	}
915	EXPORT_SYMBOL(drm_sched_job_add_dependency);
916
917	/**
918	* drm_sched_job_add_syncobj_dependency - adds a syncobj's fence as a job dependency
919	* @job: scheduler job to add the dependencies to
920	* @file: drm file private pointer
921	* @handle: syncobj handle to lookup
922	* @point: timeline point
923	*
924	* This adds the fence matching the given syncobj to @job.
925	*
926	* Returns:
927	* 0 on success, or an error on failing to expand the array.
928	*/
929	int drm_sched_job_add_syncobj_dependency(struct drm_sched_job *job,
930	struct drm_file *file,
931	u32 handle,
932	u32 point)
933	{
934	struct dma_fence *fence;
935	int ret;
936
937	ret = drm_syncobj_find_fence(file_private: file, handle, point, flags: `0`, fence: &fence);
938	if (ret)
939	return ret;
940
941	return drm_sched_job_add_dependency(job, fence);
942	}
943	EXPORT_SYMBOL(drm_sched_job_add_syncobj_dependency);
944
945	/**
946	* drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
947	* @job: scheduler job to add the dependencies to
948	* @resv: the dma_resv object to get the fences from
949	* @usage: the dma_resv_usage to use to filter the fences
950	*
951	* This adds all fences matching the given usage from @resv to @job.
952	* Must be called with the @resv lock held.
953	*
954	* Returns:
955	* 0 on success, or an error on failing to expand the array.
956	*/
957	int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
958	struct dma_resv *resv,
959	enum dma_resv_usage usage)
960	{
961	struct dma_resv_iter cursor;
962	struct dma_fence *fence;
963	int ret;
964
965	dma_resv_assert_held(resv);
966
967	dma_resv_for_each_fence(&cursor, resv, usage, fence) {
968	/*
969	* As drm_sched_job_add_dependency always consumes the fence
970	* reference (even when it fails), and dma_resv_for_each_fence
971	* is not obtaining one, we need to grab one before calling.
972	*/
973	ret = drm_sched_job_add_dependency(job, dma_fence_get(fence));
974	if (ret)
975	return ret;
976	}
977	return `0`;
978	}
979	EXPORT_SYMBOL(drm_sched_job_add_resv_dependencies);
980
981	/**
982	* drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
983	* dependencies
984	* @job: scheduler job to add the dependencies to
985	* @obj: the gem object to add new dependencies from.
986	* @write: whether the job might write the object (so we need to depend on
987	* shared fences in the reservation object).
988	*
989	* This should be called after drm_gem_lock_reservations() on your array of
990	* GEM objects used in the job but before updating the reservations with your
991	* own fences.
992	*
993	* Returns:
994	* 0 on success, or an error on failing to expand the array.
995	*/
996	int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
997	struct drm_gem_object *obj,
998	bool write)
999	{
1000	return drm_sched_job_add_resv_dependencies(job, obj->resv,
1001	dma_resv_usage_rw(write));
1002	}
1003	EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
1004
1005	/**
1006	* drm_sched_job_has_dependency - check whether fence is the job's dependency
1007	* @job: scheduler job to check
1008	* @fence: fence to look for
1009	*
1010	* Returns:
1011	* True if @fence is found within the job's dependencies, or otherwise false.
1012	*/
1013	bool drm_sched_job_has_dependency(struct drm_sched_job *job,
1014	struct dma_fence *fence)
1015	{
1016	struct dma_fence *f;
1017	unsigned long index;
1018
1019	xa_for_each(&job->dependencies, index, f) {
1020	if (f == fence)
1021	return true;
1022	}
1023
1024	return false;
1025	}
1026	EXPORT_SYMBOL(drm_sched_job_has_dependency);
1027
1028	/**
1029	* drm_sched_job_cleanup - clean up scheduler job resources
1030	* @job: scheduler job to clean up
1031	*
1032	* Cleans up the resources allocated with drm_sched_job_init().
1033	*
1034	* Drivers should call this from their error unwind code if @job is aborted
1035	* before drm_sched_job_arm() is called.
1036	*
1037	* drm_sched_job_arm() is a point of no return since it initializes the fences
1038	* and their sequence number etc. Once that function has been called, you must
1039	* submit it with drm_sched_entity_push_job() and cannot simply abort it by
1040	* calling drm_sched_job_cleanup().
1041	*
1042	* This function should be called in the &drm_sched_backend_ops.free_job callback.
1043	*/
1044	void drm_sched_job_cleanup(struct drm_sched_job *job)
1045	{
1046	struct dma_fence *fence;
1047	unsigned long index;
1048
1049	if (kref_read(kref: &job->s_fence->finished.refcount)) {
1050	/ The job has been processed by the scheduler, i.e.,*
1051	* drm_sched_job_arm() and drm_sched_entity_push_job() have
1052	* been called.
1053	*/
1054	dma_fence_put(fence: &job->s_fence->finished);
1055	} else {
1056	/ The job was aborted before it has been committed to be run;*
1057	* notably, drm_sched_job_arm() has not been called.
1058	*/
1059	drm_sched_fence_free(fence: job->s_fence);
1060	}
1061
1062	job->s_fence = NULL;
1063
1064	xa_for_each(&job->dependencies, index, fence) {
1065	dma_fence_put(fence);
1066	}
1067	xa_destroy(&job->dependencies);
1068
1069	}
1070	EXPORT_SYMBOL(drm_sched_job_cleanup);
1071
1072	/**
1073	* drm_sched_wakeup - Wake up the scheduler if it is ready to queue
1074	* @sched: scheduler instance
1075	*
1076	* Wake up the scheduler if we can queue jobs.
1077	*/
1078	void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
1079	{
1080	drm_sched_run_job_queue(sched);
1081	}
1082
1083	/**
1084	* drm_sched_select_entity - Select next entity to process
1085	*
1086	* @sched: scheduler instance
1087	*
1088	* Return an entity to process or NULL if none are found.
1089	*
1090	* Note, that we break out of the for-loop when "entity" is non-null, which can
1091	* also be an error-pointer--this assures we don't process lower priority
1092	* run-queues. See comments in the respectively called functions.
1093	*/
1094	static struct drm_sched_entity *
1095	drm_sched_select_entity(struct drm_gpu_scheduler *sched)
1096	{
1097	struct drm_sched_entity *entity;
1098	int i;
1099
1100	/ Start with the highest priority.*
1101	*/
1102	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1103	entity = drm_sched_policy == DRM_SCHED_POLICY_FIFO ?
1104	drm_sched_rq_select_entity_fifo(sched, rq: sched->sched_rq[i]) :
1105	drm_sched_rq_select_entity_rr(sched, rq: sched->sched_rq[i]);
1106	if (entity)
1107	break;
1108	}
1109
1110	return IS_ERR(ptr: entity) ? NULL : entity;
1111	}
1112
1113	/**
1114	* drm_sched_get_finished_job - fetch the next finished job to be destroyed
1115	*
1116	* @sched: scheduler instance
1117	* @have_more: are there more finished jobs on the list
1118	*
1119	* Informs the caller through @have_more whether there are more finished jobs
1120	* besides the returned one.
1121	*
1122	* Returns the next finished job from the pending list (if there is one)
1123	* ready for it to be destroyed.
1124	*/
1125	static struct drm_sched_job *
1126	drm_sched_get_finished_job(struct drm_gpu_scheduler sched, bool have_more)
1127	{
1128	struct drm_sched_job job, next;
1129
1130	spin_lock(lock: &sched->job_list_lock);
1131
1132	job = list_first_entry_or_null(&sched->pending_list,
1133	struct drm_sched_job, list);
1134	if (job && dma_fence_is_signaled(fence: &job->s_fence->finished)) {
1135	/ remove job from pending_list /
1136	list_del_init(entry: &job->list);
1137
1138	/ cancel this job's TO timer /
1139	cancel_delayed_work(dwork: &sched->work_tdr);
1140
1141	*have_more = false;
1142	next = list_first_entry_or_null(&sched->pending_list,
1143	typeof(*next), list);
1144	if (next) {
1145	/ make the scheduled timestamp more accurate /
1146	if (test_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT,
1147	&next->s_fence->scheduled.flags))
1148	next->s_fence->scheduled.timestamp =
1149	dma_fence_timestamp(fence: &job->s_fence->finished);
1150
1151	*have_more = dma_fence_is_signaled(fence: &next->s_fence->finished);
1152
1153	/ start TO timer for next job /
1154	drm_sched_start_timeout(sched);
1155	}
1156	} else {
1157	job = NULL;
1158	}
1159
1160	spin_unlock(lock: &sched->job_list_lock);
1161
1162	return job;
1163	}
1164
1165	/**
1166	* drm_sched_pick_best - Get a drm sched from a sched_list with the least load
1167	* @sched_list: list of drm_gpu_schedulers
1168	* @num_sched_list: number of drm_gpu_schedulers in the sched_list
1169	*
1170	* Returns pointer of the sched with the least load or NULL if none of the
1171	* drm_gpu_schedulers are ready
1172	*/
1173	struct drm_gpu_scheduler *
1174	drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
1175	unsigned int num_sched_list)
1176	{
1177	struct drm_gpu_scheduler sched, picked_sched = NULL;
1178	int i;
1179	unsigned int min_score = UINT_MAX, num_score;
1180
1181	for (i = `0`; i < num_sched_list; ++i) {
1182	sched = sched_list[i];
1183
1184	if (!sched->ready) {
1185	DRM_WARN("scheduler %s is not ready, skipping",
1186	sched->name);
1187	continue;
1188	}
1189
1190	num_score = atomic_read(v: sched->score);
1191	if (num_score < min_score) {
1192	min_score = num_score;
1193	picked_sched = sched;
1194	}
1195	}
1196
1197	return picked_sched;
1198	}
1199	EXPORT_SYMBOL(drm_sched_pick_best);
1200
1201	/**
1202	* drm_sched_free_job_work - worker to call free_job
1203	*
1204	* @w: free job work
1205	*/
1206	static void drm_sched_free_job_work(struct work_struct *w)
1207	{
1208	struct drm_gpu_scheduler *sched =
1209	container_of(w, struct drm_gpu_scheduler, work_free_job);
1210	struct drm_sched_job *job;
1211	bool have_more;
1212
1213	job = drm_sched_get_finished_job(sched, have_more: &have_more);
1214	if (job) {
1215	sched->ops->free_job(job);
1216	if (have_more)
1217	drm_sched_run_free_queue(sched);
1218	}
1219
1220	drm_sched_run_job_queue(sched);
1221	}
1222
1223	/**
1224	* drm_sched_run_job_work - worker to call run_job
1225	*
1226	* @w: run job work
1227	*/
1228	static void drm_sched_run_job_work(struct work_struct *w)
1229	{
1230	struct drm_gpu_scheduler *sched =
1231	container_of(w, struct drm_gpu_scheduler, work_run_job);
1232	struct drm_sched_entity *entity;
1233	struct dma_fence *fence;
1234	struct drm_sched_fence *s_fence;
1235	struct drm_sched_job *sched_job;
1236	int r;
1237
1238	/ Find entity with a ready job /
1239	entity = drm_sched_select_entity(sched);
1240	if (!entity) {
1241	/*
1242	* Either no more work to do, or the next ready job needs more
1243	* credits than the scheduler has currently available.
1244	*/
1245	return;
1246	}
1247
1248	sched_job = drm_sched_entity_pop_job(entity);
1249	if (!sched_job) {
1250	complete_all(&entity->entity_idle);
1251	drm_sched_run_job_queue(sched);
1252	return;
1253	}
1254
1255	s_fence = sched_job->s_fence;
1256
1257	atomic_add(i: sched_job->credits, v: &sched->credit_count);
1258	drm_sched_job_begin(s_job: sched_job);
1259
1260	trace_drm_sched_job_run(sched_job, entity);
1261	/*
1262	* The run_job() callback must by definition return a fence whose
1263	* refcount has been incremented for the scheduler already.
1264	*/
1265	fence = sched->ops->run_job(sched_job);
1266	complete_all(&entity->entity_idle);
1267	drm_sched_fence_scheduled(fence: s_fence, parent: fence);
1268
1269	if (!IS_ERR_OR_NULL(ptr: fence)) {
1270	r = dma_fence_add_callback(fence, cb: &sched_job->cb,
1271	func: drm_sched_job_done_cb);
1272	if (r == -ENOENT)
1273	drm_sched_job_done(s_job: sched_job, result: fence->error);
1274	else if (r)
1275	DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", r);
1276
1277	dma_fence_put(fence);
1278	} else {
1279	drm_sched_job_done(s_job: sched_job, result: IS_ERR(ptr: fence) ?
1280	PTR_ERR(ptr: fence) : `0`);
1281	}
1282
1283	wake_up(&sched->job_scheduled);
1284	drm_sched_run_job_queue(sched);
1285	}
1286
1287	static struct workqueue_struct drm_sched_alloc_wq(const* char *name)
1288	{
1289	#if (IS_ENABLED(CONFIG_LOCKDEP))
1290	static struct lockdep_map map = {
1291	.name = "drm_sched_lockdep_map"
1292	};
1293
1294	/*
1295	* Avoid leaking a lockdep map on each drm sched creation and
1296	* destruction by using a single lockdep map for all drm sched
1297	* allocated submit_wq.
1298	*/
1299
1300	return alloc_ordered_workqueue_lockdep_map(name, WQ_MEM_RECLAIM, &map);
1301	#else
1302	return alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
1303	#endif
1304	}
1305
1306	/**
1307	* drm_sched_init - Init a gpu scheduler instance
1308	*
1309	* @sched: scheduler instance
1310	* @args: scheduler initialization arguments
1311	*
1312	* Return 0 on success, otherwise error code.
1313	*/
1314	int drm_sched_init(struct drm_gpu_scheduler sched, const* struct drm_sched_init_args *args)
1315	{
1316	int i;
1317
1318	sched->ops = args->ops;
1319	sched->credit_limit = args->credit_limit;
1320	sched->name = args->name;
1321	sched->timeout = args->timeout;
1322	sched->hang_limit = args->hang_limit;
1323	sched->timeout_wq = args->timeout_wq ? args->timeout_wq : system_percpu_wq;
1324	sched->score = args->score ? args->score : &sched->_score;
1325	sched->dev = args->dev;
1326
1327	if (args->num_rqs > DRM_SCHED_PRIORITY_COUNT) {
1328	/ This is a gross violation--tell drivers what the problem is.*
1329	*/
1330	dev_err(sched->dev, "%s: num_rqs cannot be greater than DRM_SCHED_PRIORITY_COUNT\n",
1331	__func__);
1332	return -EINVAL;
1333	} else if (sched->sched_rq) {
1334	/ Not an error, but warn anyway so drivers can*
1335	* fine-tune their DRM calling order, and return all
1336	* is good.
1337	*/
1338	dev_warn(sched->dev, "%s: scheduler already initialized!\n", __func__);
1339	return `0`;
1340	}
1341
1342	if (args->submit_wq) {
1343	sched->submit_wq = args->submit_wq;
1344	sched->own_submit_wq = false;
1345	} else {
1346	sched->submit_wq = drm_sched_alloc_wq(name: args->name);
1347	if (!sched->submit_wq)
1348	return -ENOMEM;
1349
1350	sched->own_submit_wq = true;
1351	}
1352
1353	sched->sched_rq = kmalloc_array(args->num_rqs, sizeof(*sched->sched_rq),
1354	GFP_KERNEL \| __GFP_ZERO);
1355	if (!sched->sched_rq)
1356	goto Out_check_own;
1357	sched->num_rqs = args->num_rqs;
1358	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1359	sched->sched_rq[i] = kzalloc(sizeof(*sched->sched_rq[i]), GFP_KERNEL);
1360	if (!sched->sched_rq[i])
1361	goto Out_unroll;
1362	drm_sched_rq_init(sched, rq: sched->sched_rq[i]);
1363	}
1364
1365	init_waitqueue_head(&sched->job_scheduled);
1366	INIT_LIST_HEAD(list: &sched->pending_list);
1367	spin_lock_init(&sched->job_list_lock);
1368	atomic_set(v: &sched->credit_count, i: `0`);
1369	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
1370	INIT_WORK(&sched->work_run_job, drm_sched_run_job_work);
1371	INIT_WORK(&sched->work_free_job, drm_sched_free_job_work);
1372	atomic_set(v: &sched->_score, i: `0`);
1373	atomic64_set(v: &sched->job_id_count, i: `0`);
1374	sched->pause_submit = false;
1375
1376	sched->ready = true;
1377	return `0`;
1378	Out_unroll:
1379	for (--i ; i >= DRM_SCHED_PRIORITY_KERNEL; i--)
1380	kfree(objp: sched->sched_rq[i]);
1381
1382	kfree(objp: sched->sched_rq);
1383	sched->sched_rq = NULL;
1384	Out_check_own:
1385	if (sched->own_submit_wq)
1386	destroy_workqueue(wq: sched->submit_wq);
1387	dev_err(sched->dev, "%s: Failed to setup GPU scheduler--out of memory\n", __func__);
1388	return -ENOMEM;
1389	}
1390	EXPORT_SYMBOL(drm_sched_init);
1391
1392	static void drm_sched_cancel_remaining_jobs(struct drm_gpu_scheduler *sched)
1393	{
1394	struct drm_sched_job job, tmp;
1395
1396	/ All other accessors are stopped. No locking necessary. /
1397	list_for_each_entry_safe_reverse(job, tmp, &sched->pending_list, list) {
1398	sched->ops->cancel_job(job);
1399	list_del(entry: &job->list);
1400	sched->ops->free_job(job);
1401	}
1402	}
1403
1404	/**
1405	* drm_sched_fini - Destroy a gpu scheduler
1406	*
1407	* @sched: scheduler instance
1408	*
1409	* Tears down and cleans up the scheduler.
1410	*
1411	* This stops submission of new jobs to the hardware through &struct
1412	* drm_sched_backend_ops.run_job. If &struct drm_sched_backend_ops.cancel_job
1413	* is implemented, all jobs will be canceled through it and afterwards cleaned
1414	* up through &struct drm_sched_backend_ops.free_job. If cancel_job is not
1415	* implemented, memory could leak.
1416	*/
1417	void drm_sched_fini(struct drm_gpu_scheduler *sched)
1418	{
1419	struct drm_sched_entity *s_entity;
1420	int i;
1421
1422	drm_sched_wqueue_stop(sched);
1423
1424	for (i = DRM_SCHED_PRIORITY_KERNEL; i < sched->num_rqs; i++) {
1425	struct drm_sched_rq *rq = sched->sched_rq[i];
1426
1427	spin_lock(lock: &rq->lock);
1428	list_for_each_entry(s_entity, &rq->entities, list) {
1429	/*
1430	* Prevents reinsertion and marks job_queue as idle,
1431	* it will be removed from the rq in drm_sched_entity_fini()
1432	* eventually
1433	*
1434	* FIXME:
1435	* This lacks the proper spin_lock(&s_entity->lock) and
1436	* is, therefore, a race condition. Most notably, it
1437	* can race with drm_sched_entity_push_job(). The lock
1438	* cannot be taken here, however, because this would
1439	* lead to lock inversion -> deadlock.
1440	*
1441	* The best solution probably is to enforce the life
1442	* time rule of all entities having to be torn down
1443	* before their scheduler. Then, however, locking could
1444	* be dropped alltogether from this function.
1445	*
1446	* For now, this remains a potential race in all
1447	* drivers that keep entities alive for longer than
1448	* the scheduler.
1449	*
1450	* The READ_ONCE() is there to make the lockless read
1451	* (warning about the lockless write below) slightly
1452	* less broken...
1453	*/
1454	if (!READ_ONCE(s_entity->stopped))
1455	dev_warn(sched->dev, "Tearing down scheduler with active entities!\n");
1456	s_entity->stopped = true;
1457	}
1458	spin_unlock(lock: &rq->lock);
1459	kfree(objp: sched->sched_rq[i]);
1460	}
1461
1462	/ Wakeup everyone stuck in drm_sched_entity_flush for this scheduler /
1463	wake_up_all(&sched->job_scheduled);
1464
1465	/ Confirm no work left behind accessing device structures /
1466	cancel_delayed_work_sync(dwork: &sched->work_tdr);
1467
1468	/ Avoid memory leaks if supported by the driver. /
1469	if (sched->ops->cancel_job)
1470	drm_sched_cancel_remaining_jobs(sched);
1471
1472	if (sched->own_submit_wq)
1473	destroy_workqueue(wq: sched->submit_wq);
1474	sched->ready = false;
1475	kfree(objp: sched->sched_rq);
1476	sched->sched_rq = NULL;
1477
1478	if (!list_empty(head: &sched->pending_list))
1479	dev_warn(sched->dev, "Tearing down scheduler while jobs are pending!\n");
1480	}
1481	EXPORT_SYMBOL(drm_sched_fini);
1482
1483	/**
1484	* drm_sched_increase_karma - Update sched_entity guilty flag
1485	*
1486	* @bad: The job guilty of time out
1487	*
1488	* Increment on every hang caused by the 'bad' job. If this exceeds the hang
1489	* limit of the scheduler then the respective sched entity is marked guilty and
1490	* jobs from it will not be scheduled further
1491	*/
1492	void drm_sched_increase_karma(struct drm_sched_job *bad)
1493	{
1494	int i;
1495	struct drm_sched_entity *tmp;
1496	struct drm_sched_entity *entity;
1497	struct drm_gpu_scheduler *sched = bad->sched;
1498
1499	/ don't change @bad's karma if it's from KERNEL RQ,*
1500	* because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
1501	* corrupt but keep in mind that kernel jobs always considered good.
1502	*/
1503	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
1504	atomic_inc(v: &bad->karma);
1505
1506	for (i = DRM_SCHED_PRIORITY_HIGH; i < sched->num_rqs; i++) {
1507	struct drm_sched_rq *rq = sched->sched_rq[i];
1508
1509	spin_lock(lock: &rq->lock);
1510	list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
1511	if (bad->s_fence->scheduled.context ==
1512	entity->fence_context) {
1513	if (entity->guilty)
1514	atomic_set(v: entity->guilty, i: `1`);
1515	break;
1516	}
1517	}
1518	spin_unlock(lock: &rq->lock);
1519	if (&entity->list != &rq->entities)
1520	break;
1521	}
1522	}
1523	}
1524	EXPORT_SYMBOL(drm_sched_increase_karma);
1525
1526	/**
1527	* drm_sched_wqueue_ready - Is the scheduler ready for submission
1528	*
1529	* @sched: scheduler instance
1530	*
1531	* Returns true if submission is ready
1532	*/
1533	bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched)
1534	{
1535	return sched->ready;
1536	}
1537	EXPORT_SYMBOL(drm_sched_wqueue_ready);
1538
1539	/**
1540	* drm_sched_wqueue_stop - stop scheduler submission
1541	* @sched: scheduler instance
1542	*
1543	* Stops the scheduler from pulling new jobs from entities. It also stops
1544	* freeing jobs automatically through drm_sched_backend_ops.free_job().
1545	*/
1546	void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched)
1547	{
1548	WRITE_ONCE(sched->pause_submit, true);
1549	cancel_work_sync(work: &sched->work_run_job);
1550	cancel_work_sync(work: &sched->work_free_job);
1551	}
1552	EXPORT_SYMBOL(drm_sched_wqueue_stop);
1553
1554	/**
1555	* drm_sched_wqueue_start - start scheduler submission
1556	* @sched: scheduler instance
1557	*
1558	* Restarts the scheduler after drm_sched_wqueue_stop() has stopped it.
1559	*
1560	* This function is not necessary for 'conventional' startup. The scheduler is
1561	* fully operational after drm_sched_init() succeeded.
1562	*/
1563	void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched)
1564	{
1565	WRITE_ONCE(sched->pause_submit, false);
1566	queue_work(wq: sched->submit_wq, work: &sched->work_run_job);
1567	queue_work(wq: sched->submit_wq, work: &sched->work_free_job);
1568	}
1569	EXPORT_SYMBOL(drm_sched_wqueue_start);
1570

source code of linux/drivers/gpu/drm/scheduler/sched_main.c