panthor_sched.c source code [linux/drivers/gpu/drm/panthor/panthor_sched.c]

1	// SPDX-License-Identifier: GPL-2.0 or MIT
2	/ Copyright 2023 Collabora ltd. /
3
4	#include <drm/drm_drv.h>
5	#include <drm/drm_exec.h>
6	#include <drm/drm_gem_shmem_helper.h>
7	#include <drm/drm_managed.h>
8	#include <drm/drm_print.h>
9	#include <drm/gpu_scheduler.h>
10	#include <drm/panthor_drm.h>
11
12	#include <linux/build_bug.h>
13	#include <linux/cleanup.h>
14	#include <linux/clk.h>
15	#include <linux/delay.h>
16	#include <linux/dma-mapping.h>
17	#include <linux/dma-resv.h>
18	#include <linux/firmware.h>
19	#include <linux/interrupt.h>
20	#include <linux/io.h>
21	#include <linux/iopoll.h>
22	#include <linux/iosys-map.h>
23	#include <linux/module.h>
24	#include <linux/platform_device.h>
25	#include <linux/pm_runtime.h>
26
27	#include "panthor_devfreq.h"
28	#include "panthor_device.h"
29	#include "panthor_fw.h"
30	#include "panthor_gem.h"
31	#include "panthor_gpu.h"
32	#include "panthor_heap.h"
33	#include "panthor_mmu.h"
34	#include "panthor_regs.h"
35	#include "panthor_sched.h"
36
37	/**
38	* DOC: Scheduler
39	*
40	* Mali CSF hardware adopts a firmware-assisted scheduling model, where
41	* the firmware takes care of scheduling aspects, to some extent.
42	*
43	* The scheduling happens at the scheduling group level, each group
44	* contains 1 to N queues (N is FW/hardware dependent, and exposed
45	* through the firmware interface). Each queue is assigned a command
46	* stream ring buffer, which serves as a way to get jobs submitted to
47	* the GPU, among other things.
48	*
49	* The firmware can schedule a maximum of M groups (M is FW/hardware
50	* dependent, and exposed through the firmware interface). Passed
51	* this maximum number of groups, the kernel must take care of
52	* rotating the groups passed to the firmware so every group gets
53	* a chance to have his queues scheduled for execution.
54	*
55	* The current implementation only supports with kernel-mode queues.
56	* In other terms, userspace doesn't have access to the ring-buffer.
57	* Instead, userspace passes indirect command stream buffers that are
58	* called from the queue ring-buffer by the kernel using a pre-defined
59	* sequence of command stream instructions to ensure the userspace driver
60	* always gets consistent results (cache maintenance,
61	* synchronization, ...).
62	*
63	* We rely on the drm_gpu_scheduler framework to deal with job
64	* dependencies and submission. As any other driver dealing with a
65	* FW-scheduler, we use the 1:1 entity:scheduler mode, such that each
66	* entity has its own job scheduler. When a job is ready to be executed
67	* (all its dependencies are met), it is pushed to the appropriate
68	* queue ring-buffer, and the group is scheduled for execution if it
69	* wasn't already active.
70	*
71	* Kernel-side group scheduling is timeslice-based. When we have less
72	* groups than there are slots, the periodic tick is disabled and we
73	* just let the FW schedule the active groups. When there are more
74	* groups than slots, we let each group a chance to execute stuff for
75	* a given amount of time, and then re-evaluate and pick new groups
76	* to schedule. The group selection algorithm is based on
77	* priority+round-robin.
78	*
79	* Even though user-mode queues is out of the scope right now, the
80	* current design takes them into account by avoiding any guess on the
81	* group/queue state that would be based on information we wouldn't have
82	* if userspace was in charge of the ring-buffer. That's also one of the
83	* reason we don't do 'cooperative' scheduling (encoding FW group slot
84	* reservation as dma_fence that would be returned from the
85	* drm_gpu_scheduler::prepare_job() hook, and treating group rotation as
86	* a queue of waiters, ordered by job submission order). This approach
87	* would work for kernel-mode queues, but would make user-mode queues a
88	* lot more complicated to retrofit.
89	*/
90
91	#define JOB_TIMEOUT_MS 5000
92
93	#define MAX_CSG_PRIO 0xf
94
95	#define NUM_INSTRS_PER_CACHE_LINE (64 / sizeof(u64))
96	#define MAX_INSTRS_PER_JOB 24
97
98	struct panthor_group;
99
100	/**
101	* struct panthor_csg_slot - Command stream group slot
102	*
103	* This represents a FW slot for a scheduling group.
104	*/
105	struct panthor_csg_slot {
106	/* @group: Scheduling group bound to this slot. /
107	struct panthor_group *group;
108
109	/* @priority: Group priority. /
110	u8 priority;
111
112	/**
113	* @idle: True if the group bound to this slot is idle.
114	*
115	* A group is idle when it has nothing waiting for execution on
116	* all its queues, or when queues are blocked waiting for something
117	* to happen (synchronization object).
118	*/
119	bool idle;
120	};
121
122	/**
123	* enum panthor_csg_priority - Group priority
124	*/
125	enum panthor_csg_priority {
126	/* @PANTHOR_CSG_PRIORITY_LOW: Low priority group. /
127	PANTHOR_CSG_PRIORITY_LOW = `0`,
128
129	/* @PANTHOR_CSG_PRIORITY_MEDIUM: Medium priority group. /
130	PANTHOR_CSG_PRIORITY_MEDIUM,
131
132	/* @PANTHOR_CSG_PRIORITY_HIGH: High priority group. /
133	PANTHOR_CSG_PRIORITY_HIGH,
134
135	/**
136	* @PANTHOR_CSG_PRIORITY_RT: Real-time priority group.
137	*
138	* Real-time priority allows one to preempt scheduling of other
139	* non-real-time groups. When such a group becomes executable,
140	* it will evict the group with the lowest non-rt priority if
141	* there's no free group slot available.
142	*/
143	PANTHOR_CSG_PRIORITY_RT,
144
145	/* @PANTHOR_CSG_PRIORITY_COUNT: Number of priority levels. /
146	PANTHOR_CSG_PRIORITY_COUNT,
147	};
148
149	/**
150	* struct panthor_scheduler - Object used to manage the scheduler
151	*/
152	struct panthor_scheduler {
153	/* @ptdev: Device. /
154	struct panthor_device *ptdev;
155
156	/**
157	* @wq: Workqueue used by our internal scheduler logic and
158	* drm_gpu_scheduler.
159	*
160	* Used for the scheduler tick, group update or other kind of FW
161	* event processing that can't be handled in the threaded interrupt
162	* path. Also passed to the drm_gpu_scheduler instances embedded
163	* in panthor_queue.
164	*/
165	struct workqueue_struct *wq;
166
167	/**
168	* @heap_alloc_wq: Workqueue used to schedule tiler_oom works.
169	*
170	* We have a queue dedicated to heap chunk allocation works to avoid
171	* blocking the rest of the scheduler if the allocation tries to
172	* reclaim memory.
173	*/
174	struct workqueue_struct *heap_alloc_wq;
175
176	/* @tick_work: Work executed on a scheduling tick. /
177	struct delayed_work tick_work;
178
179	/**
180	* @sync_upd_work: Work used to process synchronization object updates.
181	*
182	* We use this work to unblock queues/groups that were waiting on a
183	* synchronization object.
184	*/
185	struct work_struct sync_upd_work;
186
187	/**
188	* @fw_events_work: Work used to process FW events outside the interrupt path.
189	*
190	* Even if the interrupt is threaded, we need any event processing
191	* that require taking the panthor_scheduler::lock to be processed
192	* outside the interrupt path so we don't block the tick logic when
193	* it calls panthor_fw_{csg,wait}_wait_acks(). Since most of the
194	* event processing requires taking this lock, we just delegate all
195	* FW event processing to the scheduler workqueue.
196	*/
197	struct work_struct fw_events_work;
198
199	/**
200	* @fw_events: Bitmask encoding pending FW events.
201	*/
202	atomic_t fw_events;
203
204	/**
205	* @resched_target: When the next tick should occur.
206	*
207	* Expressed in jiffies.
208	*/
209	u64 resched_target;
210
211	/**
212	* @last_tick: When the last tick occurred.
213	*
214	* Expressed in jiffies.
215	*/
216	u64 last_tick;
217
218	/* @tick_period: Tick period in jiffies. /
219	u64 tick_period;
220
221	/**
222	* @lock: Lock protecting access to all the scheduler fields.
223	*
224	* Should be taken in the tick work, the irq handler, and anywhere the @groups
225	* fields are touched.
226	*/
227	struct mutex lock;
228
229	/* @groups: Various lists used to classify groups. /
230	struct {
231	/**
232	* @runnable: Runnable group lists.
233	*
234	* When a group has queues that want to execute something,
235	* its panthor_group::run_node should be inserted here.
236	*
237	* One list per-priority.
238	*/
239	struct list_head runnable[PANTHOR_CSG_PRIORITY_COUNT];
240
241	/**
242	* @idle: Idle group lists.
243	*
244	* When all queues of a group are idle (either because they
245	* have nothing to execute, or because they are blocked), the
246	* panthor_group::run_node field should be inserted here.
247	*
248	* One list per-priority.
249	*/
250	struct list_head idle[PANTHOR_CSG_PRIORITY_COUNT];
251
252	/**
253	* @waiting: List of groups whose queues are blocked on a
254	* synchronization object.
255	*
256	* Insert panthor_group::wait_node here when a group is waiting
257	* for synchronization objects to be signaled.
258	*
259	* This list is evaluated in the @sync_upd_work work.
260	*/
261	struct list_head waiting;
262	} groups;
263
264	/**
265	* @csg_slots: FW command stream group slots.
266	*/
267	struct panthor_csg_slot csg_slots[MAX_CSGS];
268
269	/* @csg_slot_count: Number of command stream group slots exposed by the FW. /
270	u32 csg_slot_count;
271
272	/* @cs_slot_count: Number of command stream slot per group slot exposed by the FW. /
273	u32 cs_slot_count;
274
275	/* @as_slot_count: Number of address space slots supported by the MMU. /
276	u32 as_slot_count;
277
278	/* @used_csg_slot_count: Number of command stream group slot currently used. /
279	u32 used_csg_slot_count;
280
281	/* @sb_slot_count: Number of scoreboard slots. /
282	u32 sb_slot_count;
283
284	/**
285	* @might_have_idle_groups: True if an active group might have become idle.
286	*
287	* This will force a tick, so other runnable groups can be scheduled if one
288	* or more active groups became idle.
289	*/
290	bool might_have_idle_groups;
291
292	/* @pm: Power management related fields. /
293	struct {
294	/* @has_ref: True if the scheduler owns a runtime PM reference. /
295	bool has_ref;
296	} pm;
297
298	/* @reset: Reset related fields. /
299	struct {
300	/* @lock: Lock protecting the other reset fields. /
301	struct mutex lock;
302
303	/**
304	* @in_progress: True if a reset is in progress.
305	*
306	* Set to true in panthor_sched_pre_reset() and back to false in
307	* panthor_sched_post_reset().
308	*/
309	atomic_t in_progress;
310
311	/**
312	* @stopped_groups: List containing all groups that were stopped
313	* before a reset.
314	*
315	* Insert panthor_group::run_node in the pre_reset path.
316	*/
317	struct list_head stopped_groups;
318	} reset;
319	};
320
321	/**
322	* struct panthor_syncobj_32b - 32-bit FW synchronization object
323	*/
324	struct panthor_syncobj_32b {
325	/* @seqno: Sequence number. /
326	u32 seqno;
327
328	/**
329	* @status: Status.
330	*
331	* Not zero on failure.
332	*/
333	u32 status;
334	};
335
336	/**
337	* struct panthor_syncobj_64b - 64-bit FW synchronization object
338	*/
339	struct panthor_syncobj_64b {
340	/* @seqno: Sequence number. /
341	u64 seqno;
342
343	/**
344	* @status: Status.
345	*
346	* Not zero on failure.
347	*/
348	u32 status;
349
350	/* @pad: MBZ. /
351	u32 pad;
352	};
353
354	/**
355	* struct panthor_queue - Execution queue
356	*/
357	struct panthor_queue {
358	/* @scheduler: DRM scheduler used for this queue. /
359	struct drm_gpu_scheduler scheduler;
360
361	/* @entity: DRM scheduling entity used for this queue. /
362	struct drm_sched_entity entity;
363
364	/* @name: DRM scheduler name for this queue. /
365	char *name;
366
367	/* @timeout: Queue timeout related fields. /
368	struct {
369	/* @timeout.work: Work executed when a queue timeout occurs. /
370	struct delayed_work work;
371
372	/**
373	* @timeout.remaining: Time remaining before a queue timeout.
374	*
375	* When the timer is running, this value is set to MAX_SCHEDULE_TIMEOUT.
376	* When the timer is suspended, it's set to the time remaining when the
377	* timer was suspended.
378	*/
379	unsigned long remaining;
380	} timeout;
381
382	/**
383	* @doorbell_id: Doorbell assigned to this queue.
384	*
385	* Right now, all groups share the same doorbell, and the doorbell ID
386	* is assigned to group_slot + 1 when the group is assigned a slot. But
387	* we might decide to provide fine grained doorbell assignment at some
388	* point, so don't have to wake up all queues in a group every time one
389	* of them is updated.
390	*/
391	u8 doorbell_id;
392
393	/**
394	* @priority: Priority of the queue inside the group.
395	*
396	* Must be less than 16 (Only 4 bits available).
397	*/
398	u8 priority;
399	#define CSF_MAX_QUEUE_PRIO GENMASK(3, 0)
400
401	/* @ringbuf: Command stream ring-buffer. /
402	struct panthor_kernel_bo *ringbuf;
403
404	/* @iface: Firmware interface. /
405	struct {
406	/* @mem: FW memory allocated for this interface. /
407	struct panthor_kernel_bo *mem;
408
409	/* @input: Input interface. /
410	struct panthor_fw_ringbuf_input_iface *input;
411
412	/* @output: Output interface. /
413	const struct panthor_fw_ringbuf_output_iface *output;
414
415	/* @input_fw_va: FW virtual address of the input interface buffer. /
416	u32 input_fw_va;
417
418	/* @output_fw_va: FW virtual address of the output interface buffer. /
419	u32 output_fw_va;
420	} iface;
421
422	/**
423	* @syncwait: Stores information about the synchronization object this
424	* queue is waiting on.
425	*/
426	struct {
427	/* @gpu_va: GPU address of the synchronization object. /
428	u64 gpu_va;
429
430	/* @ref: Reference value to compare against. /
431	u64 ref;
432
433	/* @gt: True if this is a greater-than test. /
434	bool gt;
435
436	/* @sync64: True if this is a 64-bit sync object. /
437	bool sync64;
438
439	/* @bo: Buffer object holding the synchronization object. /
440	struct drm_gem_object *obj;
441
442	/* @offset: Offset of the synchronization object inside @bo. /
443	u64 offset;
444
445	/**
446	* @kmap: Kernel mapping of the buffer object holding the
447	* synchronization object.
448	*/
449	void *kmap;
450	} syncwait;
451
452	/* @fence_ctx: Fence context fields. /
453	struct {
454	/* @lock: Used to protect access to all fences allocated by this context. /
455	spinlock_t lock;
456
457	/**
458	* @id: Fence context ID.
459	*
460	* Allocated with dma_fence_context_alloc().
461	*/
462	u64 id;
463
464	/* @seqno: Sequence number of the last initialized fence. /
465	atomic64_t seqno;
466
467	/**
468	* @last_fence: Fence of the last submitted job.
469	*
470	* We return this fence when we get an empty command stream.
471	* This way, we are guaranteed that all earlier jobs have completed
472	* when drm_sched_job::s_fence::finished without having to feed
473	* the CS ring buffer with a dummy job that only signals the fence.
474	*/
475	struct dma_fence *last_fence;
476
477	/**
478	* @in_flight_jobs: List containing all in-flight jobs.
479	*
480	* Used to keep track and signal panthor_job::done_fence when the
481	* synchronization object attached to the queue is signaled.
482	*/
483	struct list_head in_flight_jobs;
484	} fence_ctx;
485
486	/* @profiling: Job profiling data slots and access information. /
487	struct {
488	/* @slots: Kernel BO holding the slots. /
489	struct panthor_kernel_bo *slots;
490
491	/* @slot_count: Number of jobs ringbuffer can hold at once. /
492	u32 slot_count;
493
494	/* @seqno: Index of the next available profiling information slot. /
495	u32 seqno;
496	} profiling;
497	};
498
499	/**
500	* enum panthor_group_state - Scheduling group state.
501	*/
502	enum panthor_group_state {
503	/* @PANTHOR_CS_GROUP_CREATED: Group was created, but not scheduled yet. /
504	PANTHOR_CS_GROUP_CREATED,
505
506	/* @PANTHOR_CS_GROUP_ACTIVE: Group is currently scheduled. /
507	PANTHOR_CS_GROUP_ACTIVE,
508
509	/**
510	* @PANTHOR_CS_GROUP_SUSPENDED: Group was scheduled at least once, but is
511	* inactive/suspended right now.
512	*/
513	PANTHOR_CS_GROUP_SUSPENDED,
514
515	/**
516	* @PANTHOR_CS_GROUP_TERMINATED: Group was terminated.
517	*
518	* Can no longer be scheduled. The only allowed action is a destruction.
519	*/
520	PANTHOR_CS_GROUP_TERMINATED,
521
522	/**
523	* @PANTHOR_CS_GROUP_UNKNOWN_STATE: Group is an unknown state.
524	*
525	* The FW returned an inconsistent state. The group is flagged unusable
526	* and can no longer be scheduled. The only allowed action is a
527	* destruction.
528	*
529	* When that happens, we also schedule a FW reset, to start from a fresh
530	* state.
531	*/
532	PANTHOR_CS_GROUP_UNKNOWN_STATE,
533	};
534
535	/**
536	* struct panthor_group - Scheduling group object
537	*/
538	struct panthor_group {
539	/* @refcount: Reference count /
540	struct kref refcount;
541
542	/* @ptdev: Device. /
543	struct panthor_device *ptdev;
544
545	/* @vm: VM bound to the group. /
546	struct panthor_vm *vm;
547
548	/* @compute_core_mask: Mask of shader cores that can be used for compute jobs. /
549	u64 compute_core_mask;
550
551	/* @fragment_core_mask: Mask of shader cores that can be used for fragment jobs. /
552	u64 fragment_core_mask;
553
554	/* @tiler_core_mask: Mask of tiler cores that can be used for tiler jobs. /
555	u64 tiler_core_mask;
556
557	/* @max_compute_cores: Maximum number of shader cores used for compute jobs. /
558	u8 max_compute_cores;
559
560	/* @max_fragment_cores: Maximum number of shader cores used for fragment jobs. /
561	u8 max_fragment_cores;
562
563	/* @max_tiler_cores: Maximum number of tiler cores used for tiler jobs. /
564	u8 max_tiler_cores;
565
566	/* @priority: Group priority (check panthor_csg_priority). /
567	u8 priority;
568
569	/* @blocked_queues: Bitmask reflecting the blocked queues. /
570	u32 blocked_queues;
571
572	/* @idle_queues: Bitmask reflecting the idle queues. /
573	u32 idle_queues;
574
575	/* @fatal_lock: Lock used to protect access to fatal fields. /
576	spinlock_t fatal_lock;
577
578	/* @fatal_queues: Bitmask reflecting the queues that hit a fatal exception. /
579	u32 fatal_queues;
580
581	/* @tiler_oom: Mask of queues that have a tiler OOM event to process. /
582	atomic_t tiler_oom;
583
584	/* @queue_count: Number of queues in this group. /
585	u32 queue_count;
586
587	/* @queues: Queues owned by this group. /
588	struct panthor_queue *queues[MAX_CS_PER_CSG];
589
590	/**
591	* @csg_id: ID of the FW group slot.
592	*
593	* -1 when the group is not scheduled/active.
594	*/
595	int csg_id;
596
597	/**
598	* @destroyed: True when the group has been destroyed.
599	*
600	* If a group is destroyed it becomes useless: no further jobs can be submitted
601	* to its queues. We simply wait for all references to be dropped so we can
602	* release the group object.
603	*/
604	bool destroyed;
605
606	/**
607	* @timedout: True when a timeout occurred on any of the queues owned by
608	* this group.
609	*
610	* Timeouts can be reported by drm_sched or by the FW. If a reset is required,
611	* and the group can't be suspended, this also leads to a timeout. In any case,
612	* any timeout situation is unrecoverable, and the group becomes useless. We
613	* simply wait for all references to be dropped so we can release the group
614	* object.
615	*/
616	bool timedout;
617
618	/**
619	* @innocent: True when the group becomes unusable because the group suspension
620	* failed during a reset.
621	*
622	* Sometimes the FW was put in a bad state by other groups, causing the group
623	* suspension happening in the reset path to fail. In that case, we consider the
624	* group innocent.
625	*/
626	bool innocent;
627
628	/**
629	* @syncobjs: Pool of per-queue synchronization objects.
630	*
631	* One sync object per queue. The position of the sync object is
632	* determined by the queue index.
633	*/
634	struct panthor_kernel_bo *syncobjs;
635
636	/* @fdinfo: Per-file info exposed through /proc/<process>/fdinfo /
637	struct {
638	/* @data: Total sampled values for jobs in queues from this group. /
639	struct panthor_gpu_usage data;
640
641	/**
642	* @fdinfo.lock: Spinlock to govern concurrent access from drm file's fdinfo
643	* callback and job post-completion processing function
644	*/
645	spinlock_t lock;
646
647	/* @fdinfo.kbo_sizes: Aggregate size of private kernel BO's held by the group. /
648	size_t kbo_sizes;
649	} fdinfo;
650
651	/* @task_info: Info of current->group_leader that created the group. /
652	struct {
653	/* @task_info.pid: pid of current->group_leader /
654	pid_t pid;
655
656	/* @task_info.comm: comm of current->group_leader /
657	char comm[TASK_COMM_LEN];
658	} task_info;
659
660	/* @state: Group state. /
661	enum panthor_group_state state;
662
663	/**
664	* @suspend_buf: Suspend buffer.
665	*
666	* Stores the state of the group and its queues when a group is suspended.
667	* Used at resume time to restore the group in its previous state.
668	*
669	* The size of the suspend buffer is exposed through the FW interface.
670	*/
671	struct panthor_kernel_bo *suspend_buf;
672
673	/**
674	* @protm_suspend_buf: Protection mode suspend buffer.
675	*
676	* Stores the state of the group and its queues when a group that's in
677	* protection mode is suspended.
678	*
679	* Used at resume time to restore the group in its previous state.
680	*
681	* The size of the protection mode suspend buffer is exposed through the
682	* FW interface.
683	*/
684	struct panthor_kernel_bo *protm_suspend_buf;
685
686	/* @sync_upd_work: Work used to check/signal job fences. /
687	struct work_struct sync_upd_work;
688
689	/* @tiler_oom_work: Work used to process tiler OOM events happening on this group. /
690	struct work_struct tiler_oom_work;
691
692	/* @term_work: Work used to finish the group termination procedure. /
693	struct work_struct term_work;
694
695	/**
696	* @release_work: Work used to release group resources.
697	*
698	* We need to postpone the group release to avoid a deadlock when
699	* the last ref is released in the tick work.
700	*/
701	struct work_struct release_work;
702
703	/**
704	* @run_node: Node used to insert the group in the
705	* panthor_group::groups::{runnable,idle} and
706	* panthor_group::reset.stopped_groups lists.
707	*/
708	struct list_head run_node;
709
710	/**
711	* @wait_node: Node used to insert the group in the
712	* panthor_group::groups::waiting list.
713	*/
714	struct list_head wait_node;
715	};
716
717	struct panthor_job_profiling_data {
718	struct {
719	u64 before;
720	u64 after;
721	} cycles;
722
723	struct {
724	u64 before;
725	u64 after;
726	} time;
727	};
728
729	/**
730	* group_queue_work() - Queue a group work
731	* @group: Group to queue the work for.
732	* @wname: Work name.
733	*
734	* Grabs a ref and queue a work item to the scheduler workqueue. If
735	* the work was already queued, we release the reference we grabbed.
736	*
737	* Work callbacks must release the reference we grabbed here.
738	*/
739	#define group_queue_work(group, wname) \
740	do { \
741	group_get(group); \
742	if (!queue_work((group)->ptdev->scheduler->wq, &(group)->wname ## _work)) \
743	group_put(group); \
744	} while (0)
745
746	/**
747	* sched_queue_work() - Queue a scheduler work.
748	* @sched: Scheduler object.
749	* @wname: Work name.
750	*
751	* Conditionally queues a scheduler work if no reset is pending/in-progress.
752	*/
753	#define sched_queue_work(sched, wname) \
754	do { \
755	if (!atomic_read(&(sched)->reset.in_progress) && \
756	!panthor_device_reset_is_pending((sched)->ptdev)) \
757	queue_work((sched)->wq, &(sched)->wname ## _work); \
758	} while (0)
759
760	/**
761	* sched_queue_delayed_work() - Queue a scheduler delayed work.
762	* @sched: Scheduler object.
763	* @wname: Work name.
764	* @delay: Work delay in jiffies.
765	*
766	* Conditionally queues a scheduler delayed work if no reset is
767	* pending/in-progress.
768	*/
769	#define sched_queue_delayed_work(sched, wname, delay) \
770	do { \
771	if (!atomic_read(&sched->reset.in_progress) && \
772	!panthor_device_reset_is_pending((sched)->ptdev)) \
773	mod_delayed_work((sched)->wq, &(sched)->wname ## _work, delay); \
774	} while (0)
775
776	/*
777	* We currently set the maximum of groups per file to an arbitrary low value.
778	* But this can be updated if we need more.
779	*/
780	#define MAX_GROUPS_PER_POOL 128
781
782	/*
783	* Mark added on an entry of group pool Xarray to identify if the group has
784	* been fully initialized and can be accessed elsewhere in the driver code.
785	*/
786	#define GROUP_REGISTERED XA_MARK_1
787
788	/**
789	* struct panthor_group_pool - Group pool
790	*
791	* Each file get assigned a group pool.
792	*/
793	struct panthor_group_pool {
794	/* @xa: Xarray used to manage group handles. /
795	struct xarray xa;
796	};
797
798	/**
799	* struct panthor_job - Used to manage GPU job
800	*/
801	struct panthor_job {
802	/* @base: Inherit from drm_sched_job. /
803	struct drm_sched_job base;
804
805	/* @refcount: Reference count. /
806	struct kref refcount;
807
808	/* @group: Group of the queue this job will be pushed to. /
809	struct panthor_group *group;
810
811	/* @queue_idx: Index of the queue inside @group. /
812	u32 queue_idx;
813
814	/* @call_info: Information about the userspace command stream call. /
815	struct {
816	/* @start: GPU address of the userspace command stream. /
817	u64 start;
818
819	/* @size: Size of the userspace command stream. /
820	u32 size;
821
822	/**
823	* @latest_flush: Flush ID at the time the userspace command
824	* stream was built.
825	*
826	* Needed for the flush reduction mechanism.
827	*/
828	u32 latest_flush;
829	} call_info;
830
831	/* @ringbuf: Position of this job is in the ring buffer. /
832	struct {
833	/* @start: Start offset. /
834	u64 start;
835
836	/* @end: End offset. /
837	u64 end;
838	} ringbuf;
839
840	/**
841	* @node: Used to insert the job in the panthor_queue::fence_ctx::in_flight_jobs
842	* list.
843	*/
844	struct list_head node;
845
846	/* @done_fence: Fence signaled when the job is finished or cancelled. /
847	struct dma_fence *done_fence;
848
849	/* @profiling: Job profiling information. /
850	struct {
851	/* @mask: Current device job profiling enablement bitmask. /
852	u32 mask;
853
854	/* @slot: Job index in the profiling slots BO. /
855	u32 slot;
856	} profiling;
857	};
858
859	static void
860	panthor_queue_put_syncwait_obj(struct panthor_queue *queue)
861	{
862	if (queue->syncwait.kmap) {
863	struct iosys_map map = IOSYS_MAP_INIT_VADDR(queue->syncwait.kmap);
864
865	drm_gem_vunmap(obj: queue->syncwait.obj, map: &map);
866	queue->syncwait.kmap = NULL;
867	}
868
869	drm_gem_object_put(obj: queue->syncwait.obj);
870	queue->syncwait.obj = NULL;
871	}
872
873	static void *
874	panthor_queue_get_syncwait_obj(struct panthor_group group, struct* panthor_queue *queue)
875	{
876	struct panthor_device *ptdev = group->ptdev;
877	struct panthor_gem_object *bo;
878	struct iosys_map map;
879	int ret;
880
881	if (queue->syncwait.kmap)
882	return queue->syncwait.kmap + queue->syncwait.offset;
883
884	bo = panthor_vm_get_bo_for_va(vm: group->vm,
885	va: queue->syncwait.gpu_va,
886	bo_offset: &queue->syncwait.offset);
887	if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(bo)))
888	goto err_put_syncwait_obj;
889
890	queue->syncwait.obj = &bo->base.base;
891	ret = drm_gem_vmap(obj: queue->syncwait.obj, map: &map);
892	if (drm_WARN_ON(&ptdev->base, ret))
893	goto err_put_syncwait_obj;
894
895	queue->syncwait.kmap = map.vaddr;
896	if (drm_WARN_ON(&ptdev->base, !queue->syncwait.kmap))
897	goto err_put_syncwait_obj;
898
899	return queue->syncwait.kmap + queue->syncwait.offset;
900
901	err_put_syncwait_obj:
902	panthor_queue_put_syncwait_obj(queue);
903	return NULL;
904	}
905
906	static void group_free_queue(struct panthor_group group, struct* panthor_queue *queue)
907	{
908	if (IS_ERR_OR_NULL(ptr: queue))
909	return;
910
911	/ This should have been disabled before that point. /
912	drm_WARN_ON(&group->ptdev->base,
913	disable_delayed_work_sync(&queue->timeout.work));
914
915	if (queue->entity.fence_context)
916	drm_sched_entity_destroy(entity: &queue->entity);
917
918	if (queue->scheduler.ops)
919	drm_sched_fini(sched: &queue->scheduler);
920
921	kfree(objp: queue->name);
922
923	panthor_queue_put_syncwait_obj(queue);
924
925	panthor_kernel_bo_destroy(bo: queue->ringbuf);
926	panthor_kernel_bo_destroy(bo: queue->iface.mem);
927	panthor_kernel_bo_destroy(bo: queue->profiling.slots);
928
929	/ Release the last_fence we were holding, if any. /
930	dma_fence_put(fence: queue->fence_ctx.last_fence);
931
932	kfree(objp: queue);
933	}
934
935	static void group_release_work(struct work_struct *work)
936	{
937	struct panthor_group *group = container_of(work,
938	struct panthor_group,
939	release_work);
940	u32 i;
941
942	for (i = `0`; i < group->queue_count; i++)
943	group_free_queue(group, queue: group->queues[i]);
944
945	panthor_kernel_bo_destroy(bo: group->suspend_buf);
946	panthor_kernel_bo_destroy(bo: group->protm_suspend_buf);
947	panthor_kernel_bo_destroy(bo: group->syncobjs);
948
949	panthor_vm_put(vm: group->vm);
950	kfree(objp: group);
951	}
952
953	static void group_release(struct kref *kref)
954	{
955	struct panthor_group *group = container_of(kref,
956	struct panthor_group,
957	refcount);
958	struct panthor_device *ptdev = group->ptdev;
959
960	drm_WARN_ON(&ptdev->base, group->csg_id >= `0`);
961	drm_WARN_ON(&ptdev->base, !list_empty(&group->run_node));
962	drm_WARN_ON(&ptdev->base, !list_empty(&group->wait_node));
963
964	queue_work(wq: panthor_cleanup_wq, work: &group->release_work);
965	}
966
967	static void group_put(struct panthor_group *group)
968	{
969	if (group)
970	kref_put(kref: &group->refcount, release: group_release);
971	}
972
973	static struct panthor_group *
974	group_get(struct panthor_group *group)
975	{
976	if (group)
977	kref_get(kref: &group->refcount);
978
979	return group;
980	}
981
982	/**
983	* group_bind_locked() - Bind a group to a group slot
984	* @group: Group.
985	* @csg_id: Slot.
986	*
987	* Return: 0 on success, a negative error code otherwise.
988	*/
989	static int
990	group_bind_locked(struct panthor_group *group, u32 csg_id)
991	{
992	struct panthor_device *ptdev = group->ptdev;
993	struct panthor_csg_slot *csg_slot;
994	int ret;
995
996	lockdep_assert_held(&ptdev->scheduler->lock);
997
998	if (drm_WARN_ON(&ptdev->base, group->csg_id != -`1` \|\| csg_id >= MAX_CSGS \|\|
999	ptdev->scheduler->csg_slots[csg_id].group))
1000	return -EINVAL;
1001
1002	ret = panthor_vm_active(vm: group->vm);
1003	if (ret)
1004	return ret;
1005
1006	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1007	group_get(group);
1008	group->csg_id = csg_id;
1009
1010	/ Dummy doorbell allocation: doorbell is assigned to the group and*
1011	* all queues use the same doorbell.
1012	*
1013	* TODO: Implement LRU-based doorbell assignment, so the most often
1014	* updated queues get their own doorbell, thus avoiding useless checks
1015	* on queues belonging to the same group that are rarely updated.
1016	*/
1017	for (u32 i = `0`; i < group->queue_count; i++)
1018	group->queues[i]->doorbell_id = csg_id + `1`;
1019
1020	csg_slot->group = group;
1021
1022	return `0`;
1023	}
1024
1025	/**
1026	* group_unbind_locked() - Unbind a group from a slot.
1027	* @group: Group to unbind.
1028	*
1029	* Return: 0 on success, a negative error code otherwise.
1030	*/
1031	static int
1032	group_unbind_locked(struct panthor_group *group)
1033	{
1034	struct panthor_device *ptdev = group->ptdev;
1035	struct panthor_csg_slot *slot;
1036
1037	lockdep_assert_held(&ptdev->scheduler->lock);
1038
1039	if (drm_WARN_ON(&ptdev->base, group->csg_id < `0` \|\| group->csg_id >= MAX_CSGS))
1040	return -EINVAL;
1041
1042	if (drm_WARN_ON(&ptdev->base, group->state == PANTHOR_CS_GROUP_ACTIVE))
1043	return -EINVAL;
1044
1045	slot = &ptdev->scheduler->csg_slots[group->csg_id];
1046	panthor_vm_idle(vm: group->vm);
1047	group->csg_id = -`1`;
1048
1049	/ Tiler OOM events will be re-issued next time the group is scheduled. /
1050	atomic_set(v: &group->tiler_oom, i: `0`);
1051	cancel_work(work: &group->tiler_oom_work);
1052
1053	for (u32 i = `0`; i < group->queue_count; i++)
1054	group->queues[i]->doorbell_id = -`1`;
1055
1056	slot->group = NULL;
1057
1058	group_put(group);
1059	return `0`;
1060	}
1061
1062	static bool
1063	group_is_idle(struct panthor_group *group)
1064	{
1065	struct panthor_device *ptdev = group->ptdev;
1066	u32 inactive_queues;
1067
1068	if (group->csg_id >= `0`)
1069	return ptdev->scheduler->csg_slots[group->csg_id].idle;
1070
1071	inactive_queues = group->idle_queues \| group->blocked_queues;
1072	return hweight32(inactive_queues) == group->queue_count;
1073	}
1074
1075	static void
1076	queue_reset_timeout_locked(struct panthor_queue *queue)
1077	{
1078	lockdep_assert_held(&queue->fence_ctx.lock);
1079
1080	if (queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT) {
1081	mod_delayed_work(wq: queue->scheduler.timeout_wq,
1082	dwork: &queue->timeout.work,
1083	delay: msecs_to_jiffies(JOB_TIMEOUT_MS));
1084	}
1085	}
1086
1087	static bool
1088	group_can_run(struct panthor_group *group)
1089	{
1090	return group->state != PANTHOR_CS_GROUP_TERMINATED &&
1091	group->state != PANTHOR_CS_GROUP_UNKNOWN_STATE &&
1092	!group->destroyed && group->fatal_queues == `0` &&
1093	!group->timedout;
1094	}
1095
1096	static bool
1097	queue_timeout_is_suspended(struct panthor_queue *queue)
1098	{
1099	/ When running, the remaining time is set to MAX_SCHEDULE_TIMEOUT. /
1100	return queue->timeout.remaining != MAX_SCHEDULE_TIMEOUT;
1101	}
1102
1103	static void
1104	queue_suspend_timeout_locked(struct panthor_queue *queue)
1105	{
1106	unsigned long qtimeout, now;
1107	struct panthor_group *group;
1108	struct panthor_job *job;
1109	bool timer_was_active;
1110
1111	lockdep_assert_held(&queue->fence_ctx.lock);
1112
1113	/ Already suspended, nothing to do. /
1114	if (queue_timeout_is_suspended(queue))
1115	return;
1116
1117	job = list_first_entry_or_null(&queue->fence_ctx.in_flight_jobs,
1118	struct panthor_job, node);
1119	group = job ? job->group : NULL;
1120
1121	/ If the queue is blocked and the group is idle, we want the timer to*
1122	* keep running because the group can't be unblocked by other queues,
1123	* so it has to come from an external source, and we want to timebox
1124	* this external signalling.
1125	*/
1126	if (group && group_can_run(group) &&
1127	(group->blocked_queues & BIT(job->queue_idx)) &&
1128	group_is_idle(group))
1129	return;
1130
1131	now = jiffies;
1132	qtimeout = queue->timeout.work.timer.expires;
1133
1134	/ Cancel the timer. /
1135	timer_was_active = cancel_delayed_work(dwork: &queue->timeout.work);
1136	if (!timer_was_active \|\| !job)
1137	queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
1138	else if (time_after(qtimeout, now))
1139	queue->timeout.remaining = qtimeout - now;
1140	else
1141	queue->timeout.remaining = `0`;
1142
1143	if (WARN_ON_ONCE(queue->timeout.remaining > msecs_to_jiffies(JOB_TIMEOUT_MS)))
1144	queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
1145	}
1146
1147	static void
1148	queue_suspend_timeout(struct panthor_queue *queue)
1149	{
1150	spin_lock(lock: &queue->fence_ctx.lock);
1151	queue_suspend_timeout_locked(queue);
1152	spin_unlock(lock: &queue->fence_ctx.lock);
1153	}
1154
1155	static void
1156	queue_resume_timeout(struct panthor_queue *queue)
1157	{
1158	spin_lock(lock: &queue->fence_ctx.lock);
1159
1160	if (queue_timeout_is_suspended(queue)) {
1161	mod_delayed_work(wq: queue->scheduler.timeout_wq,
1162	dwork: &queue->timeout.work,
1163	delay: queue->timeout.remaining);
1164
1165	queue->timeout.remaining = MAX_SCHEDULE_TIMEOUT;
1166	}
1167
1168	spin_unlock(lock: &queue->fence_ctx.lock);
1169	}
1170
1171	/**
1172	* cs_slot_prog_locked() - Program a queue slot
1173	* @ptdev: Device.
1174	* @csg_id: Group slot ID.
1175	* @cs_id: Queue slot ID.
1176	*
1177	* Program a queue slot with the queue information so things can start being
1178	* executed on this queue.
1179	*
1180	* The group slot must have a group bound to it already (group_bind_locked()).
1181	*/
1182	static void
1183	cs_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1184	{
1185	struct panthor_queue *queue = ptdev->scheduler->csg_slots[csg_id].group->queues[cs_id];
1186	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_slot: csg_id, cs_slot: cs_id);
1187
1188	lockdep_assert_held(&ptdev->scheduler->lock);
1189
1190	queue->iface.input->extract = queue->iface.output->extract;
1191	drm_WARN_ON(&ptdev->base, queue->iface.input->insert < queue->iface.input->extract);
1192
1193	cs_iface->input->ringbuf_base = panthor_kernel_bo_gpuva(bo: queue->ringbuf);
1194	cs_iface->input->ringbuf_size = panthor_kernel_bo_size(bo: queue->ringbuf);
1195	cs_iface->input->ringbuf_input = queue->iface.input_fw_va;
1196	cs_iface->input->ringbuf_output = queue->iface.output_fw_va;
1197	cs_iface->input->config = CS_CONFIG_PRIORITY(queue->priority) \|
1198	CS_CONFIG_DOORBELL(queue->doorbell_id);
1199	cs_iface->input->ack_irq_mask = ~`0`;
1200	panthor_fw_update_reqs(cs_iface, req,
1201	CS_IDLE_SYNC_WAIT \|
1202	CS_IDLE_EMPTY \|
1203	CS_STATE_START \|
1204	CS_EXTRACT_EVENT,
1205	CS_IDLE_SYNC_WAIT \|
1206	CS_IDLE_EMPTY \|
1207	CS_STATE_MASK \|
1208	CS_EXTRACT_EVENT);
1209	if (queue->iface.input->insert != queue->iface.input->extract)
1210	queue_resume_timeout(queue);
1211	}
1212
1213	/**
1214	* cs_slot_reset_locked() - Reset a queue slot
1215	* @ptdev: Device.
1216	* @csg_id: Group slot.
1217	* @cs_id: Queue slot.
1218	*
1219	* Change the queue slot state to STOP and suspend the queue timeout if
1220	* the queue is not blocked.
1221	*
1222	* The group slot must have a group bound to it (group_bind_locked()).
1223	*/
1224	static int
1225	cs_slot_reset_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1226	{
1227	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_slot: csg_id, cs_slot: cs_id);
1228	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1229	struct panthor_queue *queue = group->queues[cs_id];
1230
1231	lockdep_assert_held(&ptdev->scheduler->lock);
1232
1233	panthor_fw_update_reqs(cs_iface, req,
1234	CS_STATE_STOP,
1235	CS_STATE_MASK);
1236
1237	queue_suspend_timeout(queue);
1238
1239	return `0`;
1240	}
1241
1242	/**
1243	* csg_slot_sync_priority_locked() - Synchronize the group slot priority
1244	* @ptdev: Device.
1245	* @csg_id: Group slot ID.
1246	*
1247	* Group slot priority update happens asynchronously. When we receive a
1248	* %CSG_ENDPOINT_CONFIG, we know the update is effective, and can
1249	* reflect it to our panthor_csg_slot object.
1250	*/
1251	static void
1252	csg_slot_sync_priority_locked(struct panthor_device *ptdev, u32 csg_id)
1253	{
1254	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1255	struct panthor_fw_csg_iface *csg_iface;
1256	u64 endpoint_req;
1257
1258	lockdep_assert_held(&ptdev->scheduler->lock);
1259
1260	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
1261	endpoint_req = panthor_fw_csg_endpoint_req_get(ptdev, csg_iface);
1262	csg_slot->priority = CSG_EP_REQ_PRIORITY_GET(endpoint_req);
1263	}
1264
1265	/**
1266	* cs_slot_sync_queue_state_locked() - Synchronize the queue slot priority
1267	* @ptdev: Device.
1268	* @csg_id: Group slot.
1269	* @cs_id: Queue slot.
1270	*
1271	* Queue state is updated on group suspend or STATUS_UPDATE event.
1272	*/
1273	static void
1274	cs_slot_sync_queue_state_locked(struct panthor_device *ptdev, u32 csg_id, u32 cs_id)
1275	{
1276	struct panthor_group *group = ptdev->scheduler->csg_slots[csg_id].group;
1277	struct panthor_queue *queue = group->queues[cs_id];
1278	struct panthor_fw_cs_iface *cs_iface =
1279	panthor_fw_get_cs_iface(ptdev: group->ptdev, csg_slot: csg_id, cs_slot: cs_id);
1280
1281	u32 status_wait_cond;
1282
1283	switch (cs_iface->output->status_blocked_reason) {
1284	case CS_STATUS_BLOCKED_REASON_UNBLOCKED:
1285	if (queue->iface.input->insert == queue->iface.output->extract &&
1286	cs_iface->output->status_scoreboards == `0`)
1287	group->idle_queues \|= BIT(cs_id);
1288	break;
1289
1290	case CS_STATUS_BLOCKED_REASON_SYNC_WAIT:
1291	if (list_empty(head: &group->wait_node)) {
1292	list_move_tail(list: &group->wait_node,
1293	head: &group->ptdev->scheduler->groups.waiting);
1294	}
1295
1296	/ The queue is only blocked if there's no deferred operation*
1297	* pending, which can be checked through the scoreboard status.
1298	*/
1299	if (!cs_iface->output->status_scoreboards)
1300	group->blocked_queues \|= BIT(cs_id);
1301
1302	queue->syncwait.gpu_va = cs_iface->output->status_wait_sync_ptr;
1303	queue->syncwait.ref = cs_iface->output->status_wait_sync_value;
1304	status_wait_cond = cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_COND_MASK;
1305	queue->syncwait.gt = status_wait_cond == CS_STATUS_WAIT_SYNC_COND_GT;
1306	if (cs_iface->output->status_wait & CS_STATUS_WAIT_SYNC_64B) {
1307	u64 sync_val_hi = cs_iface->output->status_wait_sync_value_hi;
1308
1309	queue->syncwait.sync64 = true;
1310	queue->syncwait.ref \|= sync_val_hi << `32`;
1311	} else {
1312	queue->syncwait.sync64 = false;
1313	}
1314	break;
1315
1316	default:
1317	/ Other reasons are not blocking. Consider the queue as runnable*
1318	* in those cases.
1319	*/
1320	break;
1321	}
1322	}
1323
1324	static void
1325	csg_slot_sync_queues_state_locked(struct panthor_device *ptdev, u32 csg_id)
1326	{
1327	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1328	struct panthor_group *group = csg_slot->group;
1329	u32 i;
1330
1331	lockdep_assert_held(&ptdev->scheduler->lock);
1332
1333	group->idle_queues = `0`;
1334	group->blocked_queues = `0`;
1335
1336	for (i = `0`; i < group->queue_count; i++) {
1337	if (group->queues[i])
1338	cs_slot_sync_queue_state_locked(ptdev, csg_id, cs_id: i);
1339	}
1340	}
1341
1342	static void
1343	csg_slot_sync_state_locked(struct panthor_device *ptdev, u32 csg_id)
1344	{
1345	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1346	struct panthor_fw_csg_iface *csg_iface;
1347	struct panthor_group *group;
1348	enum panthor_group_state new_state, old_state;
1349	u32 csg_state;
1350
1351	lockdep_assert_held(&ptdev->scheduler->lock);
1352
1353	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
1354	group = csg_slot->group;
1355
1356	if (!group)
1357	return;
1358
1359	old_state = group->state;
1360	csg_state = csg_iface->output->ack & CSG_STATE_MASK;
1361	switch (csg_state) {
1362	case CSG_STATE_START:
1363	case CSG_STATE_RESUME:
1364	new_state = PANTHOR_CS_GROUP_ACTIVE;
1365	break;
1366	case CSG_STATE_TERMINATE:
1367	new_state = PANTHOR_CS_GROUP_TERMINATED;
1368	break;
1369	case CSG_STATE_SUSPEND:
1370	new_state = PANTHOR_CS_GROUP_SUSPENDED;
1371	break;
1372	default:
1373	/ The unknown state might be caused by a FW state corruption,*
1374	* which means the group metadata can't be trusted anymore, and
1375	* the SUSPEND operation might propagate the corruption to the
1376	* suspend buffers. Flag the group state as unknown to make
1377	* sure it's unusable after that point.
1378	*/
1379	drm_err(&ptdev->base, "Invalid state on CSG %d (state=%d)",
1380	csg_id, csg_state);
1381	new_state = PANTHOR_CS_GROUP_UNKNOWN_STATE;
1382	break;
1383	}
1384
1385	if (old_state == new_state)
1386	return;
1387
1388	/ The unknown state might be caused by a FW issue, reset the FW to*
1389	* take a fresh start.
1390	*/
1391	if (new_state == PANTHOR_CS_GROUP_UNKNOWN_STATE)
1392	panthor_device_schedule_reset(ptdev);
1393
1394	if (new_state == PANTHOR_CS_GROUP_SUSPENDED)
1395	csg_slot_sync_queues_state_locked(ptdev, csg_id);
1396
1397	if (old_state == PANTHOR_CS_GROUP_ACTIVE) {
1398	u32 i;
1399
1400	/ Reset the queue slots so we start from a clean*
1401	* state when starting/resuming a new group on this
1402	* CSG slot. No wait needed here, and no ringbell
1403	* either, since the CS slot will only be re-used
1404	* on the next CSG start operation.
1405	*/
1406	for (i = `0`; i < group->queue_count; i++) {
1407	if (group->queues[i])
1408	cs_slot_reset_locked(ptdev, csg_id, cs_id: i);
1409	}
1410	}
1411
1412	group->state = new_state;
1413	}
1414
1415	static int
1416	csg_slot_prog_locked(struct panthor_device *ptdev, u32 csg_id, u32 priority)
1417	{
1418	struct panthor_fw_csg_iface *csg_iface;
1419	struct panthor_csg_slot *csg_slot;
1420	struct panthor_group *group;
1421	u32 queue_mask = `0`, i;
1422	u64 endpoint_req;
1423
1424	lockdep_assert_held(&ptdev->scheduler->lock);
1425
1426	if (priority > MAX_CSG_PRIO)
1427	return -EINVAL;
1428
1429	if (drm_WARN_ON(&ptdev->base, csg_id >= MAX_CSGS))
1430	return -EINVAL;
1431
1432	csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1433	group = csg_slot->group;
1434	if (!group \|\| group->state == PANTHOR_CS_GROUP_ACTIVE)
1435	return `0`;
1436
1437	csg_iface = panthor_fw_get_csg_iface(ptdev: group->ptdev, csg_slot: csg_id);
1438
1439	for (i = `0`; i < group->queue_count; i++) {
1440	if (group->queues[i]) {
1441	cs_slot_prog_locked(ptdev, csg_id, cs_id: i);
1442	queue_mask \|= BIT(i);
1443	}
1444	}
1445
1446	csg_iface->input->allow_compute = group->compute_core_mask;
1447	csg_iface->input->allow_fragment = group->fragment_core_mask;
1448	csg_iface->input->allow_other = group->tiler_core_mask;
1449	endpoint_req = CSG_EP_REQ_COMPUTE(group->max_compute_cores) \|
1450	CSG_EP_REQ_FRAGMENT(group->max_fragment_cores) \|
1451	CSG_EP_REQ_TILER(group->max_tiler_cores) \|
1452	CSG_EP_REQ_PRIORITY(priority);
1453	panthor_fw_csg_endpoint_req_set(ptdev, csg_iface, value: endpoint_req);
1454
1455	csg_iface->input->config = panthor_vm_as(vm: group->vm);
1456
1457	if (group->suspend_buf)
1458	csg_iface->input->suspend_buf = panthor_kernel_bo_gpuva(bo: group->suspend_buf);
1459	else
1460	csg_iface->input->suspend_buf = `0`;
1461
1462	if (group->protm_suspend_buf) {
1463	csg_iface->input->protm_suspend_buf =
1464	panthor_kernel_bo_gpuva(bo: group->protm_suspend_buf);
1465	} else {
1466	csg_iface->input->protm_suspend_buf = `0`;
1467	}
1468
1469	csg_iface->input->ack_irq_mask = ~`0`;
1470	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, queue_mask);
1471	return `0`;
1472	}
1473
1474	static void
1475	cs_slot_process_fatal_event_locked(struct panthor_device *ptdev,
1476	u32 csg_id, u32 cs_id)
1477	{
1478	struct panthor_scheduler *sched = ptdev->scheduler;
1479	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1480	struct panthor_group *group = csg_slot->group;
1481	struct panthor_fw_cs_iface *cs_iface;
1482	u32 fatal;
1483	u64 info;
1484
1485	lockdep_assert_held(&sched->lock);
1486
1487	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_slot: csg_id, cs_slot: cs_id);
1488	fatal = cs_iface->output->fatal;
1489	info = cs_iface->output->fatal_info;
1490
1491	if (group) {
1492	drm_warn(&ptdev->base, "CS_FATAL: pid=%d, comm=%s\n",
1493	group->task_info.pid, group->task_info.comm);
1494
1495	group->fatal_queues \|= BIT(cs_id);
1496	}
1497
1498	if (CS_EXCEPTION_TYPE(fatal) == DRM_PANTHOR_EXCEPTION_CS_UNRECOVERABLE) {
1499	/ If this exception is unrecoverable, queue a reset, and make*
1500	* sure we stop scheduling groups until the reset has happened.
1501	*/
1502	panthor_device_schedule_reset(ptdev);
1503	cancel_delayed_work(dwork: &sched->tick_work);
1504	} else {
1505	sched_queue_delayed_work(sched, tick, `0`);
1506	}
1507
1508	drm_warn(&ptdev->base,
1509	"CSG slot %d CS slot: %d\n"
1510	"CS_FATAL.EXCEPTION_TYPE: 0x%x (%s)\n"
1511	"CS_FATAL.EXCEPTION_DATA: 0x%x\n"
1512	"CS_FATAL_INFO.EXCEPTION_DATA: 0x%llx\n",
1513	csg_id, cs_id,
1514	(unsigned int)CS_EXCEPTION_TYPE(fatal),
1515	panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fatal)),
1516	(unsigned int)CS_EXCEPTION_DATA(fatal),
1517	info);
1518	}
1519
1520	static void
1521	cs_slot_process_fault_event_locked(struct panthor_device *ptdev,
1522	u32 csg_id, u32 cs_id)
1523	{
1524	struct panthor_scheduler *sched = ptdev->scheduler;
1525	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1526	struct panthor_group *group = csg_slot->group;
1527	struct panthor_queue *queue = group && cs_id < group->queue_count ?
1528	group->queues[cs_id] : NULL;
1529	struct panthor_fw_cs_iface *cs_iface;
1530	u32 fault;
1531	u64 info;
1532
1533	lockdep_assert_held(&sched->lock);
1534
1535	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_slot: csg_id, cs_slot: cs_id);
1536	fault = cs_iface->output->fault;
1537	info = cs_iface->output->fault_info;
1538
1539	if (queue) {
1540	u64 cs_extract = queue->iface.output->extract;
1541	struct panthor_job *job;
1542
1543	spin_lock(lock: &queue->fence_ctx.lock);
1544	list_for_each_entry(job, &queue->fence_ctx.in_flight_jobs, node) {
1545	if (cs_extract >= job->ringbuf.end)
1546	continue;
1547
1548	if (cs_extract < job->ringbuf.start)
1549	break;
1550
1551	dma_fence_set_error(fence: job->done_fence, error: -EINVAL);
1552	}
1553	spin_unlock(lock: &queue->fence_ctx.lock);
1554	}
1555
1556	if (group) {
1557	drm_warn(&ptdev->base, "CS_FAULT: pid=%d, comm=%s\n",
1558	group->task_info.pid, group->task_info.comm);
1559	}
1560
1561	drm_warn(&ptdev->base,
1562	"CSG slot %d CS slot: %d\n"
1563	"CS_FAULT.EXCEPTION_TYPE: 0x%x (%s)\n"
1564	"CS_FAULT.EXCEPTION_DATA: 0x%x\n"
1565	"CS_FAULT_INFO.EXCEPTION_DATA: 0x%llx\n",
1566	csg_id, cs_id,
1567	(unsigned int)CS_EXCEPTION_TYPE(fault),
1568	panthor_exception_name(ptdev, CS_EXCEPTION_TYPE(fault)),
1569	(unsigned int)CS_EXCEPTION_DATA(fault),
1570	info);
1571	}
1572
1573	static int group_process_tiler_oom(struct panthor_group *group, u32 cs_id)
1574	{
1575	struct panthor_device *ptdev = group->ptdev;
1576	struct panthor_scheduler *sched = ptdev->scheduler;
1577	u32 renderpasses_in_flight, pending_frag_count;
1578	struct panthor_heap_pool *heaps = NULL;
1579	u64 heap_address, new_chunk_va = `0`;
1580	u32 vt_start, vt_end, frag_end;
1581	int ret, csg_id;
1582
1583	mutex_lock(&sched->lock);
1584	csg_id = group->csg_id;
1585	if (csg_id >= `0`) {
1586	struct panthor_fw_cs_iface *cs_iface;
1587
1588	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_slot: csg_id, cs_slot: cs_id);
1589	heaps = panthor_vm_get_heap_pool(vm: group->vm, create: false);
1590	heap_address = cs_iface->output->heap_address;
1591	vt_start = cs_iface->output->heap_vt_start;
1592	vt_end = cs_iface->output->heap_vt_end;
1593	frag_end = cs_iface->output->heap_frag_end;
1594	renderpasses_in_flight = vt_start - frag_end;
1595	pending_frag_count = vt_end - frag_end;
1596	}
1597	mutex_unlock(lock: &sched->lock);
1598
1599	/ The group got scheduled out, we stop here. We will get a new tiler OOM event*
1600	* when it's scheduled again.
1601	*/
1602	if (unlikely(csg_id < `0`))
1603	return `0`;
1604
1605	if (IS_ERR(ptr: heaps) \|\| frag_end > vt_end \|\| vt_end >= vt_start) {
1606	ret = -EINVAL;
1607	} else {
1608	/ We do the allocation without holding the scheduler lock to avoid*
1609	* blocking the scheduling.
1610	*/
1611	ret = panthor_heap_grow(pool: heaps, heap_gpu_va: heap_address,
1612	renderpasses_in_flight,
1613	pending_frag_count, new_chunk_gpu_va: &new_chunk_va);
1614	}
1615
1616	/ If the heap context doesn't have memory for us, we want to let the*
1617	* FW try to reclaim memory by waiting for fragment jobs to land or by
1618	* executing the tiler OOM exception handler, which is supposed to
1619	* implement incremental rendering.
1620	*/
1621	if (ret && ret != -ENOMEM) {
1622	drm_warn(&ptdev->base, "Failed to extend the tiler heap\n");
1623	group->fatal_queues \|= BIT(cs_id);
1624	sched_queue_delayed_work(sched, tick, `0`);
1625	goto out_put_heap_pool;
1626	}
1627
1628	mutex_lock(&sched->lock);
1629	csg_id = group->csg_id;
1630	if (csg_id >= `0`) {
1631	struct panthor_fw_csg_iface *csg_iface;
1632	struct panthor_fw_cs_iface *cs_iface;
1633
1634	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
1635	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_slot: csg_id, cs_slot: cs_id);
1636
1637	cs_iface->input->heap_start = new_chunk_va;
1638	cs_iface->input->heap_end = new_chunk_va;
1639	panthor_fw_update_reqs(cs_iface, req, cs_iface->output->ack, CS_TILER_OOM);
1640	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, BIT(cs_id));
1641	panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1642	}
1643	mutex_unlock(lock: &sched->lock);
1644
1645	/ We allocated a chunck, but couldn't link it to the heap*
1646	* context because the group was scheduled out while we were
1647	* allocating memory. We need to return this chunk to the heap.
1648	*/
1649	if (unlikely(csg_id < `0` && new_chunk_va))
1650	panthor_heap_return_chunk(pool: heaps, heap_gpu_va: heap_address, chunk_gpu_va: new_chunk_va);
1651
1652	ret = `0`;
1653
1654	out_put_heap_pool:
1655	panthor_heap_pool_put(pool: heaps);
1656	return ret;
1657	}
1658
1659	static void group_tiler_oom_work(struct work_struct *work)
1660	{
1661	struct panthor_group *group =
1662	container_of(work, struct panthor_group, tiler_oom_work);
1663	u32 tiler_oom = atomic_xchg(v: &group->tiler_oom, new: `0`);
1664
1665	while (tiler_oom) {
1666	u32 cs_id = ffs(tiler_oom) - `1`;
1667
1668	group_process_tiler_oom(group, cs_id);
1669	tiler_oom &= ~BIT(cs_id);
1670	}
1671
1672	group_put(group);
1673	}
1674
1675	static void
1676	cs_slot_process_tiler_oom_event_locked(struct panthor_device *ptdev,
1677	u32 csg_id, u32 cs_id)
1678	{
1679	struct panthor_scheduler *sched = ptdev->scheduler;
1680	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1681	struct panthor_group *group = csg_slot->group;
1682
1683	lockdep_assert_held(&sched->lock);
1684
1685	if (drm_WARN_ON(&ptdev->base, !group))
1686	return;
1687
1688	atomic_or(BIT(cs_id), v: &group->tiler_oom);
1689
1690	/ We don't use group_queue_work() here because we want to queue the*
1691	* work item to the heap_alloc_wq.
1692	*/
1693	group_get(group);
1694	if (!queue_work(wq: sched->heap_alloc_wq, work: &group->tiler_oom_work))
1695	group_put(group);
1696	}
1697
1698	static bool cs_slot_process_irq_locked(struct panthor_device *ptdev,
1699	u32 csg_id, u32 cs_id)
1700	{
1701	struct panthor_fw_cs_iface *cs_iface;
1702	u32 req, ack, events;
1703
1704	lockdep_assert_held(&ptdev->scheduler->lock);
1705
1706	cs_iface = panthor_fw_get_cs_iface(ptdev, csg_slot: csg_id, cs_slot: cs_id);
1707	req = cs_iface->input->req;
1708	ack = cs_iface->output->ack;
1709	events = (req ^ ack) & CS_EVT_MASK;
1710
1711	if (events & CS_FATAL)
1712	cs_slot_process_fatal_event_locked(ptdev, csg_id, cs_id);
1713
1714	if (events & CS_FAULT)
1715	cs_slot_process_fault_event_locked(ptdev, csg_id, cs_id);
1716
1717	if (events & CS_TILER_OOM)
1718	cs_slot_process_tiler_oom_event_locked(ptdev, csg_id, cs_id);
1719
1720	/ We don't acknowledge the TILER_OOM event since its handling is*
1721	* deferred to a separate work.
1722	*/
1723	panthor_fw_update_reqs(cs_iface, req, ack, CS_FATAL \| CS_FAULT);
1724
1725	return (events & (CS_FAULT \| CS_TILER_OOM)) != `0`;
1726	}
1727
1728	static void csg_slot_sync_idle_state_locked(struct panthor_device *ptdev, u32 csg_id)
1729	{
1730	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1731	struct panthor_fw_csg_iface *csg_iface;
1732
1733	lockdep_assert_held(&ptdev->scheduler->lock);
1734
1735	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
1736	csg_slot->idle = csg_iface->output->status_state & CSG_STATUS_STATE_IS_IDLE;
1737	}
1738
1739	static void csg_slot_process_idle_event_locked(struct panthor_device *ptdev, u32 csg_id)
1740	{
1741	struct panthor_scheduler *sched = ptdev->scheduler;
1742
1743	lockdep_assert_held(&sched->lock);
1744
1745	sched->might_have_idle_groups = true;
1746
1747	/ Schedule a tick so we can evict idle groups and schedule non-idle*
1748	* ones. This will also update runtime PM and devfreq busy/idle states,
1749	* so the device can lower its frequency or get suspended.
1750	*/
1751	sched_queue_delayed_work(sched, tick, `0`);
1752	}
1753
1754	static void csg_slot_sync_update_locked(struct panthor_device *ptdev,
1755	u32 csg_id)
1756	{
1757	struct panthor_csg_slot *csg_slot = &ptdev->scheduler->csg_slots[csg_id];
1758	struct panthor_group *group = csg_slot->group;
1759
1760	lockdep_assert_held(&ptdev->scheduler->lock);
1761
1762	if (group)
1763	group_queue_work(group, sync_upd);
1764
1765	sched_queue_work(ptdev->scheduler, sync_upd);
1766	}
1767
1768	static void
1769	csg_slot_process_progress_timer_event_locked(struct panthor_device *ptdev, u32 csg_id)
1770	{
1771	struct panthor_scheduler *sched = ptdev->scheduler;
1772	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
1773	struct panthor_group *group = csg_slot->group;
1774
1775	lockdep_assert_held(&sched->lock);
1776
1777	group = csg_slot->group;
1778	if (!drm_WARN_ON(&ptdev->base, !group)) {
1779	drm_warn(&ptdev->base, "CSG_PROGRESS_TIMER_EVENT: pid=%d, comm=%s\n",
1780	group->task_info.pid, group->task_info.comm);
1781
1782	group->timedout = true;
1783	}
1784
1785	drm_warn(&ptdev->base, "CSG slot %d progress timeout\n", csg_id);
1786
1787	sched_queue_delayed_work(sched, tick, `0`);
1788	}
1789
1790	static void sched_process_csg_irq_locked(struct panthor_device *ptdev, u32 csg_id)
1791	{
1792	u32 req, ack, cs_irq_req, cs_irq_ack, cs_irqs, csg_events;
1793	struct panthor_fw_csg_iface *csg_iface;
1794	u32 ring_cs_db_mask = `0`;
1795
1796	lockdep_assert_held(&ptdev->scheduler->lock);
1797
1798	if (drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1799	return;
1800
1801	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
1802	req = READ_ONCE(csg_iface->input->req);
1803	ack = READ_ONCE(csg_iface->output->ack);
1804	cs_irq_req = READ_ONCE(csg_iface->output->cs_irq_req);
1805	cs_irq_ack = READ_ONCE(csg_iface->input->cs_irq_ack);
1806	csg_events = (req ^ ack) & CSG_EVT_MASK;
1807
1808	/ There may not be any pending CSG/CS interrupts to process /
1809	if (req == ack && cs_irq_req == cs_irq_ack)
1810	return;
1811
1812	/ Immediately set IRQ_ACK bits to be same as the IRQ_REQ bits before*
1813	* examining the CS_ACK & CS_REQ bits. This would ensure that Host
1814	* doesn't miss an interrupt for the CS in the race scenario where
1815	* whilst Host is servicing an interrupt for the CS, firmware sends
1816	* another interrupt for that CS.
1817	*/
1818	csg_iface->input->cs_irq_ack = cs_irq_req;
1819
1820	panthor_fw_update_reqs(csg_iface, req, ack,
1821	CSG_SYNC_UPDATE \|
1822	CSG_IDLE \|
1823	CSG_PROGRESS_TIMER_EVENT);
1824
1825	if (csg_events & CSG_IDLE)
1826	csg_slot_process_idle_event_locked(ptdev, csg_id);
1827
1828	if (csg_events & CSG_PROGRESS_TIMER_EVENT)
1829	csg_slot_process_progress_timer_event_locked(ptdev, csg_id);
1830
1831	cs_irqs = cs_irq_req ^ cs_irq_ack;
1832	while (cs_irqs) {
1833	u32 cs_id = ffs(cs_irqs) - `1`;
1834
1835	if (cs_slot_process_irq_locked(ptdev, csg_id, cs_id))
1836	ring_cs_db_mask \|= BIT(cs_id);
1837
1838	cs_irqs &= ~BIT(cs_id);
1839	}
1840
1841	if (csg_events & CSG_SYNC_UPDATE)
1842	csg_slot_sync_update_locked(ptdev, csg_id);
1843
1844	if (ring_cs_db_mask)
1845	panthor_fw_toggle_reqs(csg_iface, doorbell_req, doorbell_ack, ring_cs_db_mask);
1846
1847	panthor_fw_ring_csg_doorbells(ptdev, BIT(csg_id));
1848	}
1849
1850	static void sched_process_idle_event_locked(struct panthor_device *ptdev)
1851	{
1852	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1853
1854	lockdep_assert_held(&ptdev->scheduler->lock);
1855
1856	/ Acknowledge the idle event and schedule a tick. /
1857	panthor_fw_update_reqs(glb_iface, req, glb_iface->output->ack, GLB_IDLE);
1858	sched_queue_delayed_work(ptdev->scheduler, tick, `0`);
1859	}
1860
1861	/**
1862	* sched_process_global_irq_locked() - Process the scheduling part of a global IRQ
1863	* @ptdev: Device.
1864	*/
1865	static void sched_process_global_irq_locked(struct panthor_device *ptdev)
1866	{
1867	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
1868	u32 req, ack, evts;
1869
1870	lockdep_assert_held(&ptdev->scheduler->lock);
1871
1872	req = READ_ONCE(glb_iface->input->req);
1873	ack = READ_ONCE(glb_iface->output->ack);
1874	evts = (req ^ ack) & GLB_EVT_MASK;
1875
1876	if (evts & GLB_IDLE)
1877	sched_process_idle_event_locked(ptdev);
1878	}
1879
1880	static void process_fw_events_work(struct work_struct *work)
1881	{
1882	struct panthor_scheduler sched = container_of(work, struct* panthor_scheduler,
1883	fw_events_work);
1884	u32 events = atomic_xchg(v: &sched->fw_events, new: `0`);
1885	struct panthor_device *ptdev = sched->ptdev;
1886
1887	mutex_lock(&sched->lock);
1888
1889	if (events & JOB_INT_GLOBAL_IF) {
1890	sched_process_global_irq_locked(ptdev);
1891	events &= ~JOB_INT_GLOBAL_IF;
1892	}
1893
1894	while (events) {
1895	u32 csg_id = ffs(events) - `1`;
1896
1897	sched_process_csg_irq_locked(ptdev, csg_id);
1898	events &= ~BIT(csg_id);
1899	}
1900
1901	mutex_unlock(lock: &sched->lock);
1902	}
1903
1904	/**
1905	* panthor_sched_report_fw_events() - Report FW events to the scheduler.
1906	*/
1907	void panthor_sched_report_fw_events(struct panthor_device *ptdev, u32 events)
1908	{
1909	if (!ptdev->scheduler)
1910	return;
1911
1912	atomic_or(i: events, v: &ptdev->scheduler->fw_events);
1913	sched_queue_work(ptdev->scheduler, fw_events);
1914	}
1915
1916	static const char fence_get_driver_name(struct* dma_fence *fence)
1917	{
1918	return "panthor";
1919	}
1920
1921	static const char queue_fence_get_timeline_name(struct* dma_fence *fence)
1922	{
1923	return "queue-fence";
1924	}
1925
1926	static const struct dma_fence_ops panthor_queue_fence_ops = {
1927	.get_driver_name = fence_get_driver_name,
1928	.get_timeline_name = queue_fence_get_timeline_name,
1929	};
1930
1931	struct panthor_csg_slots_upd_ctx {
1932	u32 update_mask;
1933	u32 timedout_mask;
1934	struct {
1935	u32 value;
1936	u32 mask;
1937	} requests[MAX_CSGS];
1938	};
1939
1940	static void csgs_upd_ctx_init(struct panthor_csg_slots_upd_ctx *ctx)
1941	{
1942	memset(ctx, `0`, sizeof(*ctx));
1943	}
1944
1945	static void csgs_upd_ctx_queue_reqs(struct panthor_device *ptdev,
1946	struct panthor_csg_slots_upd_ctx *ctx,
1947	u32 csg_id, u32 value, u32 mask)
1948	{
1949	if (drm_WARN_ON(&ptdev->base, !mask) \|\|
1950	drm_WARN_ON(&ptdev->base, csg_id >= ptdev->scheduler->csg_slot_count))
1951	return;
1952
1953	ctx->requests[csg_id].value = (ctx->requests[csg_id].value & ~mask) \| (value & mask);
1954	ctx->requests[csg_id].mask \|= mask;
1955	ctx->update_mask \|= BIT(csg_id);
1956	}
1957
1958	static int csgs_upd_ctx_apply_locked(struct panthor_device *ptdev,
1959	struct panthor_csg_slots_upd_ctx *ctx)
1960	{
1961	struct panthor_scheduler *sched = ptdev->scheduler;
1962	u32 update_slots = ctx->update_mask;
1963
1964	lockdep_assert_held(&sched->lock);
1965
1966	if (!ctx->update_mask)
1967	return `0`;
1968
1969	while (update_slots) {
1970	struct panthor_fw_csg_iface *csg_iface;
1971	u32 csg_id = ffs(update_slots) - `1`;
1972
1973	update_slots &= ~BIT(csg_id);
1974	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
1975	panthor_fw_update_reqs(csg_iface, req,
1976	ctx->requests[csg_id].value,
1977	ctx->requests[csg_id].mask);
1978	}
1979
1980	panthor_fw_ring_csg_doorbells(ptdev, csg_slot: ctx->update_mask);
1981
1982	update_slots = ctx->update_mask;
1983	while (update_slots) {
1984	struct panthor_fw_csg_iface *csg_iface;
1985	u32 csg_id = ffs(update_slots) - `1`;
1986	u32 req_mask = ctx->requests[csg_id].mask, acked;
1987	int ret;
1988
1989	update_slots &= ~BIT(csg_id);
1990	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
1991
1992	ret = panthor_fw_csg_wait_acks(ptdev, csg_id, req_mask, acked: &acked, timeout_ms: `100`);
1993
1994	if (acked & CSG_ENDPOINT_CONFIG)
1995	csg_slot_sync_priority_locked(ptdev, csg_id);
1996
1997	if (acked & CSG_STATE_MASK)
1998	csg_slot_sync_state_locked(ptdev, csg_id);
1999
2000	if (acked & CSG_STATUS_UPDATE) {
2001	csg_slot_sync_queues_state_locked(ptdev, csg_id);
2002	csg_slot_sync_idle_state_locked(ptdev, csg_id);
2003	}
2004
2005	if (ret && acked != req_mask &&
2006	((csg_iface->input->req ^ csg_iface->output->ack) & req_mask) != `0`) {
2007	drm_err(&ptdev->base, "CSG %d update request timedout", csg_id);
2008	ctx->timedout_mask \|= BIT(csg_id);
2009	}
2010	}
2011
2012	if (ctx->timedout_mask)
2013	return -ETIMEDOUT;
2014
2015	return `0`;
2016	}
2017
2018	struct panthor_sched_tick_ctx {
2019	struct list_head old_groups[PANTHOR_CSG_PRIORITY_COUNT];
2020	struct list_head groups[PANTHOR_CSG_PRIORITY_COUNT];
2021	u32 idle_group_count;
2022	u32 group_count;
2023	enum panthor_csg_priority min_priority;
2024	struct panthor_vm *vms[MAX_CS_PER_CSG];
2025	u32 as_count;
2026	bool immediate_tick;
2027	u32 csg_upd_failed_mask;
2028	};
2029
2030	static bool
2031	tick_ctx_is_full(const struct panthor_scheduler *sched,
2032	const struct panthor_sched_tick_ctx *ctx)
2033	{
2034	return ctx->group_count == sched->csg_slot_count;
2035	}
2036
2037	static void
2038	tick_ctx_pick_groups_from_list(const struct panthor_scheduler *sched,
2039	struct panthor_sched_tick_ctx *ctx,
2040	struct list_head *queue,
2041	bool skip_idle_groups,
2042	bool owned_by_tick_ctx)
2043	{
2044	struct panthor_group group, tmp;
2045
2046	if (tick_ctx_is_full(sched, ctx))
2047	return;
2048
2049	list_for_each_entry_safe(group, tmp, queue, run_node) {
2050	u32 i;
2051
2052	if (!group_can_run(group))
2053	continue;
2054
2055	if (skip_idle_groups && group_is_idle(group))
2056	continue;
2057
2058	for (i = `0`; i < ctx->as_count; i++) {
2059	if (ctx->vms[i] == group->vm)
2060	break;
2061	}
2062
2063	if (i == ctx->as_count && ctx->as_count == sched->as_slot_count)
2064	continue;
2065
2066	if (!owned_by_tick_ctx)
2067	group_get(group);
2068
2069	list_move_tail(list: &group->run_node, head: &ctx->groups[group->priority]);
2070	ctx->group_count++;
2071	if (group_is_idle(group))
2072	ctx->idle_group_count++;
2073
2074	if (i == ctx->as_count)
2075	ctx->vms[ctx->as_count++] = group->vm;
2076
2077	if (ctx->min_priority > group->priority)
2078	ctx->min_priority = group->priority;
2079
2080	if (tick_ctx_is_full(sched, ctx))
2081	return;
2082	}
2083	}
2084
2085	static void
2086	tick_ctx_insert_old_group(struct panthor_scheduler *sched,
2087	struct panthor_sched_tick_ctx *ctx,
2088	struct panthor_group *group,
2089	bool full_tick)
2090	{
2091	struct panthor_csg_slot *csg_slot = &sched->csg_slots[group->csg_id];
2092	struct panthor_group *other_group;
2093
2094	if (!full_tick) {
2095	list_add_tail(new: &group->run_node, head: &ctx->old_groups[group->priority]);
2096	return;
2097	}
2098
2099	/ Rotate to make sure groups with lower CSG slot*
2100	* priorities have a chance to get a higher CSG slot
2101	* priority next time they get picked. This priority
2102	* has an impact on resource request ordering, so it's
2103	* important to make sure we don't let one group starve
2104	* all other groups with the same group priority.
2105	*/
2106	list_for_each_entry(other_group,
2107	&ctx->old_groups[csg_slot->group->priority],
2108	run_node) {
2109	struct panthor_csg_slot *other_csg_slot = &sched->csg_slots[other_group->csg_id];
2110
2111	if (other_csg_slot->priority > csg_slot->priority) {
2112	list_add_tail(new: &csg_slot->group->run_node, head: &other_group->run_node);
2113	return;
2114	}
2115	}
2116
2117	list_add_tail(new: &group->run_node, head: &ctx->old_groups[group->priority]);
2118	}
2119
2120	static void
2121	tick_ctx_init(struct panthor_scheduler *sched,
2122	struct panthor_sched_tick_ctx *ctx,
2123	bool full_tick)
2124	{
2125	struct panthor_device *ptdev = sched->ptdev;
2126	struct panthor_csg_slots_upd_ctx upd_ctx;
2127	int ret;
2128	u32 i;
2129
2130	memset(ctx, `0`, sizeof(*ctx));
2131	csgs_upd_ctx_init(ctx: &upd_ctx);
2132
2133	ctx->min_priority = PANTHOR_CSG_PRIORITY_COUNT;
2134	for (i = `0`; i < ARRAY_SIZE(ctx->groups); i++) {
2135	INIT_LIST_HEAD(list: &ctx->groups[i]);
2136	INIT_LIST_HEAD(list: &ctx->old_groups[i]);
2137	}
2138
2139	for (i = `0`; i < sched->csg_slot_count; i++) {
2140	struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2141	struct panthor_group *group = csg_slot->group;
2142	struct panthor_fw_csg_iface *csg_iface;
2143
2144	if (!group)
2145	continue;
2146
2147	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: i);
2148	group_get(group);
2149
2150	/ If there was unhandled faults on the VM, force processing of*
2151	* CSG IRQs, so we can flag the faulty queue.
2152	*/
2153	if (panthor_vm_has_unhandled_faults(vm: group->vm)) {
2154	sched_process_csg_irq_locked(ptdev, csg_id: i);
2155
2156	/ No fatal fault reported, flag all queues as faulty. /
2157	if (!group->fatal_queues)
2158	group->fatal_queues \|= GENMASK(group->queue_count - `1`, `0`);
2159	}
2160
2161	tick_ctx_insert_old_group(sched, ctx, group, full_tick);
2162	csgs_upd_ctx_queue_reqs(ptdev, ctx: &upd_ctx, csg_id: i,
2163	value: csg_iface->output->ack ^ CSG_STATUS_UPDATE,
2164	CSG_STATUS_UPDATE);
2165	}
2166
2167	ret = csgs_upd_ctx_apply_locked(ptdev, ctx: &upd_ctx);
2168	if (ret) {
2169	panthor_device_schedule_reset(ptdev);
2170	ctx->csg_upd_failed_mask \|= upd_ctx.timedout_mask;
2171	}
2172	}
2173
2174	static void
2175	group_term_post_processing(struct panthor_group *group)
2176	{
2177	struct panthor_job job, tmp;
2178	LIST_HEAD(faulty_jobs);
2179	bool cookie;
2180	u32 i = `0`;
2181
2182	if (drm_WARN_ON(&group->ptdev->base, group_can_run(group)))
2183	return;
2184
2185	cookie = dma_fence_begin_signalling();
2186	for (i = `0`; i < group->queue_count; i++) {
2187	struct panthor_queue *queue = group->queues[i];
2188	struct panthor_syncobj_64b *syncobj;
2189	int err;
2190
2191	if (group->fatal_queues & BIT(i))
2192	err = -EINVAL;
2193	else if (group->timedout)
2194	err = -ETIMEDOUT;
2195	else
2196	err = -ECANCELED;
2197
2198	if (!queue)
2199	continue;
2200
2201	spin_lock(lock: &queue->fence_ctx.lock);
2202	list_for_each_entry_safe(job, tmp, &queue->fence_ctx.in_flight_jobs, node) {
2203	list_move_tail(list: &job->node, head: &faulty_jobs);
2204	dma_fence_set_error(fence: job->done_fence, error: err);
2205	dma_fence_signal_locked(fence: job->done_fence);
2206	}
2207	spin_unlock(lock: &queue->fence_ctx.lock);
2208
2209	/ Manually update the syncobj seqno to unblock waiters. /
2210	syncobj = group->syncobjs->kmap + (i * sizeof(*syncobj));
2211	syncobj->status = ~`0`;
2212	syncobj->seqno = atomic64_read(v: &queue->fence_ctx.seqno);
2213	sched_queue_work(group->ptdev->scheduler, sync_upd);
2214	}
2215	dma_fence_end_signalling(cookie);
2216
2217	list_for_each_entry_safe(job, tmp, &faulty_jobs, node) {
2218	list_del_init(entry: &job->node);
2219	panthor_job_put(job: &job->base);
2220	}
2221	}
2222
2223	static void group_term_work(struct work_struct *work)
2224	{
2225	struct panthor_group *group =
2226	container_of(work, struct panthor_group, term_work);
2227
2228	group_term_post_processing(group);
2229	group_put(group);
2230	}
2231
2232	static void
2233	tick_ctx_cleanup(struct panthor_scheduler *sched,
2234	struct panthor_sched_tick_ctx *ctx)
2235	{
2236	struct panthor_device *ptdev = sched->ptdev;
2237	struct panthor_group group, tmp;
2238	u32 i;
2239
2240	for (i = `0`; i < ARRAY_SIZE(ctx->old_groups); i++) {
2241	list_for_each_entry_safe(group, tmp, &ctx->old_groups[i], run_node) {
2242	/ If everything went fine, we should only have groups*
2243	* to be terminated in the old_groups lists.
2244	*/
2245	drm_WARN_ON(&ptdev->base, !ctx->csg_upd_failed_mask &&
2246	group_can_run(group));
2247
2248	if (!group_can_run(group)) {
2249	list_del_init(entry: &group->run_node);
2250	list_del_init(entry: &group->wait_node);
2251	group_queue_work(group, term);
2252	} else if (group->csg_id >= `0`) {
2253	list_del_init(entry: &group->run_node);
2254	} else {
2255	list_move(list: &group->run_node,
2256	head: group_is_idle(group) ?
2257	&sched->groups.idle[group->priority] :
2258	&sched->groups.runnable[group->priority]);
2259	}
2260	group_put(group);
2261	}
2262	}
2263
2264	for (i = `0`; i < ARRAY_SIZE(ctx->groups); i++) {
2265	/ If everything went fine, the groups to schedule lists should*
2266	* be empty.
2267	*/
2268	drm_WARN_ON(&ptdev->base,
2269	!ctx->csg_upd_failed_mask && !list_empty(&ctx->groups[i]));
2270
2271	list_for_each_entry_safe(group, tmp, &ctx->groups[i], run_node) {
2272	if (group->csg_id >= `0`) {
2273	list_del_init(entry: &group->run_node);
2274	} else {
2275	list_move(list: &group->run_node,
2276	head: group_is_idle(group) ?
2277	&sched->groups.idle[group->priority] :
2278	&sched->groups.runnable[group->priority]);
2279	}
2280	group_put(group);
2281	}
2282	}
2283	}
2284
2285	static void
2286	tick_ctx_apply(struct panthor_scheduler sched, struct* panthor_sched_tick_ctx *ctx)
2287	{
2288	struct panthor_group group, tmp;
2289	struct panthor_device *ptdev = sched->ptdev;
2290	struct panthor_csg_slot *csg_slot;
2291	int prio, new_csg_prio = MAX_CSG_PRIO, i;
2292	u32 free_csg_slots = `0`;
2293	struct panthor_csg_slots_upd_ctx upd_ctx;
2294	int ret;
2295
2296	csgs_upd_ctx_init(ctx: &upd_ctx);
2297
2298	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`; prio >= `0`; prio--) {
2299	/ Suspend or terminate evicted groups. /
2300	list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2301	bool term = !group_can_run(group);
2302	int csg_id = group->csg_id;
2303
2304	if (drm_WARN_ON(&ptdev->base, csg_id < `0`))
2305	continue;
2306
2307	csg_slot = &sched->csg_slots[csg_id];
2308	csgs_upd_ctx_queue_reqs(ptdev, ctx: &upd_ctx, csg_id,
2309	value: term ? CSG_STATE_TERMINATE : CSG_STATE_SUSPEND,
2310	CSG_STATE_MASK);
2311	}
2312
2313	/ Update priorities on already running groups. /
2314	list_for_each_entry(group, &ctx->groups[prio], run_node) {
2315	struct panthor_fw_csg_iface *csg_iface;
2316	int csg_id = group->csg_id;
2317
2318	if (csg_id < `0`) {
2319	new_csg_prio--;
2320	continue;
2321	}
2322
2323	csg_slot = &sched->csg_slots[csg_id];
2324	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
2325	if (csg_slot->priority == new_csg_prio) {
2326	new_csg_prio--;
2327	continue;
2328	}
2329
2330	panthor_fw_csg_endpoint_req_update(ptdev, csg_iface,
2331	CSG_EP_REQ_PRIORITY(new_csg_prio),
2332	CSG_EP_REQ_PRIORITY_MASK);
2333	csgs_upd_ctx_queue_reqs(ptdev, ctx: &upd_ctx, csg_id,
2334	value: csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2335	CSG_ENDPOINT_CONFIG);
2336	new_csg_prio--;
2337	}
2338	}
2339
2340	ret = csgs_upd_ctx_apply_locked(ptdev, ctx: &upd_ctx);
2341	if (ret) {
2342	panthor_device_schedule_reset(ptdev);
2343	ctx->csg_upd_failed_mask \|= upd_ctx.timedout_mask;
2344	return;
2345	}
2346
2347	/ Unbind evicted groups. /
2348	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`; prio >= `0`; prio--) {
2349	list_for_each_entry(group, &ctx->old_groups[prio], run_node) {
2350	/ This group is gone. Process interrupts to clear*
2351	* any pending interrupts before we start the new
2352	* group.
2353	*/
2354	if (group->csg_id >= `0`)
2355	sched_process_csg_irq_locked(ptdev, csg_id: group->csg_id);
2356
2357	group_unbind_locked(group);
2358	}
2359	}
2360
2361	for (i = `0`; i < sched->csg_slot_count; i++) {
2362	if (!sched->csg_slots[i].group)
2363	free_csg_slots \|= BIT(i);
2364	}
2365
2366	csgs_upd_ctx_init(ctx: &upd_ctx);
2367	new_csg_prio = MAX_CSG_PRIO;
2368
2369	/ Start new groups. /
2370	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`; prio >= `0`; prio--) {
2371	list_for_each_entry(group, &ctx->groups[prio], run_node) {
2372	int csg_id = group->csg_id;
2373	struct panthor_fw_csg_iface *csg_iface;
2374
2375	if (csg_id >= `0`) {
2376	new_csg_prio--;
2377	continue;
2378	}
2379
2380	csg_id = ffs(free_csg_slots) - `1`;
2381	if (drm_WARN_ON(&ptdev->base, csg_id < `0`))
2382	break;
2383
2384	csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: csg_id);
2385	csg_slot = &sched->csg_slots[csg_id];
2386	group_bind_locked(group, csg_id);
2387	csg_slot_prog_locked(ptdev, csg_id, priority: new_csg_prio--);
2388	csgs_upd_ctx_queue_reqs(ptdev, ctx: &upd_ctx, csg_id,
2389	value: group->state == PANTHOR_CS_GROUP_SUSPENDED ?
2390	CSG_STATE_RESUME : CSG_STATE_START,
2391	CSG_STATE_MASK);
2392	csgs_upd_ctx_queue_reqs(ptdev, ctx: &upd_ctx, csg_id,
2393	value: csg_iface->output->ack ^ CSG_ENDPOINT_CONFIG,
2394	CSG_ENDPOINT_CONFIG);
2395	free_csg_slots &= ~BIT(csg_id);
2396	}
2397	}
2398
2399	ret = csgs_upd_ctx_apply_locked(ptdev, ctx: &upd_ctx);
2400	if (ret) {
2401	panthor_device_schedule_reset(ptdev);
2402	ctx->csg_upd_failed_mask \|= upd_ctx.timedout_mask;
2403	return;
2404	}
2405
2406	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`; prio >= `0`; prio--) {
2407	list_for_each_entry_safe(group, tmp, &ctx->groups[prio], run_node) {
2408	list_del_init(entry: &group->run_node);
2409
2410	/ If the group has been destroyed while we were*
2411	* scheduling, ask for an immediate tick to
2412	* re-evaluate as soon as possible and get rid of
2413	* this dangling group.
2414	*/
2415	if (group->destroyed)
2416	ctx->immediate_tick = true;
2417	group_put(group);
2418	}
2419
2420	/ Return evicted groups to the idle or run queues. Groups*
2421	* that can no longer be run (because they've been destroyed
2422	* or experienced an unrecoverable error) will be scheduled
2423	* for destruction in tick_ctx_cleanup().
2424	*/
2425	list_for_each_entry_safe(group, tmp, &ctx->old_groups[prio], run_node) {
2426	if (!group_can_run(group))
2427	continue;
2428
2429	if (group_is_idle(group))
2430	list_move_tail(list: &group->run_node, head: &sched->groups.idle[prio]);
2431	else
2432	list_move_tail(list: &group->run_node, head: &sched->groups.runnable[prio]);
2433	group_put(group);
2434	}
2435	}
2436
2437	sched->used_csg_slot_count = ctx->group_count;
2438	sched->might_have_idle_groups = ctx->idle_group_count > `0`;
2439	}
2440
2441	static u64
2442	tick_ctx_update_resched_target(struct panthor_scheduler *sched,
2443	const struct panthor_sched_tick_ctx *ctx)
2444	{
2445	/ We had space left, no need to reschedule until some external event happens. /
2446	if (!tick_ctx_is_full(sched, ctx))
2447	goto no_tick;
2448
2449	/ If idle groups were scheduled, no need to wake up until some external*
2450	* event happens (group unblocked, new job submitted, ...).
2451	*/
2452	if (ctx->idle_group_count)
2453	goto no_tick;
2454
2455	if (drm_WARN_ON(&sched->ptdev->base, ctx->min_priority >= PANTHOR_CSG_PRIORITY_COUNT))
2456	goto no_tick;
2457
2458	/ If there are groups of the same priority waiting, we need to*
2459	* keep the scheduler ticking, otherwise, we'll just wait for
2460	* new groups with higher priority to be queued.
2461	*/
2462	if (!list_empty(head: &sched->groups.runnable[ctx->min_priority])) {
2463	u64 resched_target = sched->last_tick + sched->tick_period;
2464
2465	if (time_before64(sched->resched_target, sched->last_tick) \|\|
2466	time_before64(resched_target, sched->resched_target))
2467	sched->resched_target = resched_target;
2468
2469	return sched->resched_target - sched->last_tick;
2470	}
2471
2472	no_tick:
2473	sched->resched_target = U64_MAX;
2474	return U64_MAX;
2475	}
2476
2477	static void tick_work(struct work_struct *work)
2478	{
2479	struct panthor_scheduler sched = container_of(work, struct* panthor_scheduler,
2480	tick_work.work);
2481	struct panthor_device *ptdev = sched->ptdev;
2482	struct panthor_sched_tick_ctx ctx;
2483	u64 remaining_jiffies = `0`, resched_delay;
2484	u64 now = get_jiffies_64();
2485	int prio, ret, cookie;
2486
2487	if (!drm_dev_enter(dev: &ptdev->base, idx: &cookie))
2488	return;
2489
2490	ret = panthor_device_resume_and_get(ptdev);
2491	if (drm_WARN_ON(&ptdev->base, ret))
2492	goto out_dev_exit;
2493
2494	if (time_before64(now, sched->resched_target))
2495	remaining_jiffies = sched->resched_target - now;
2496
2497	mutex_lock(&sched->lock);
2498	if (panthor_device_reset_is_pending(ptdev: sched->ptdev))
2499	goto out_unlock;
2500
2501	tick_ctx_init(sched, ctx: &ctx, full_tick: remaining_jiffies != `0`);
2502	if (ctx.csg_upd_failed_mask)
2503	goto out_cleanup_ctx;
2504
2505	if (remaining_jiffies) {
2506	/ Scheduling forced in the middle of a tick. Only RT groups*
2507	* can preempt non-RT ones. Currently running RT groups can't be
2508	* preempted.
2509	*/
2510	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`;
2511	prio >= `0` && !tick_ctx_is_full(sched, ctx: &ctx);
2512	prio--) {
2513	tick_ctx_pick_groups_from_list(sched, ctx: &ctx, queue: &ctx.old_groups[prio],
2514	skip_idle_groups: true, owned_by_tick_ctx: true);
2515	if (prio == PANTHOR_CSG_PRIORITY_RT) {
2516	tick_ctx_pick_groups_from_list(sched, ctx: &ctx,
2517	queue: &sched->groups.runnable[prio],
2518	skip_idle_groups: true, owned_by_tick_ctx: false);
2519	}
2520	}
2521	}
2522
2523	/ First pick non-idle groups /
2524	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`;
2525	prio >= `0` && !tick_ctx_is_full(sched, ctx: &ctx);
2526	prio--) {
2527	tick_ctx_pick_groups_from_list(sched, ctx: &ctx, queue: &sched->groups.runnable[prio],
2528	skip_idle_groups: true, owned_by_tick_ctx: false);
2529	tick_ctx_pick_groups_from_list(sched, ctx: &ctx, queue: &ctx.old_groups[prio], skip_idle_groups: true, owned_by_tick_ctx: true);
2530	}
2531
2532	/ If we have free CSG slots left, pick idle groups /
2533	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`;
2534	prio >= `0` && !tick_ctx_is_full(sched, ctx: &ctx);
2535	prio--) {
2536	/ Check the old_group queue first to avoid reprogramming the slots /
2537	tick_ctx_pick_groups_from_list(sched, ctx: &ctx, queue: &ctx.old_groups[prio], skip_idle_groups: false, owned_by_tick_ctx: true);
2538	tick_ctx_pick_groups_from_list(sched, ctx: &ctx, queue: &sched->groups.idle[prio],
2539	skip_idle_groups: false, owned_by_tick_ctx: false);
2540	}
2541
2542	tick_ctx_apply(sched, ctx: &ctx);
2543	if (ctx.csg_upd_failed_mask)
2544	goto out_cleanup_ctx;
2545
2546	if (ctx.idle_group_count == ctx.group_count) {
2547	panthor_devfreq_record_idle(ptdev: sched->ptdev);
2548	if (sched->pm.has_ref) {
2549	pm_runtime_put_autosuspend(dev: ptdev->base.dev);
2550	sched->pm.has_ref = false;
2551	}
2552	} else {
2553	panthor_devfreq_record_busy(ptdev: sched->ptdev);
2554	if (!sched->pm.has_ref) {
2555	pm_runtime_get(dev: ptdev->base.dev);
2556	sched->pm.has_ref = true;
2557	}
2558	}
2559
2560	sched->last_tick = now;
2561	resched_delay = tick_ctx_update_resched_target(sched, ctx: &ctx);
2562	if (ctx.immediate_tick)
2563	resched_delay = `0`;
2564
2565	if (resched_delay != U64_MAX)
2566	sched_queue_delayed_work(sched, tick, resched_delay);
2567
2568	out_cleanup_ctx:
2569	tick_ctx_cleanup(sched, ctx: &ctx);
2570
2571	out_unlock:
2572	mutex_unlock(lock: &sched->lock);
2573	pm_runtime_mark_last_busy(dev: ptdev->base.dev);
2574	pm_runtime_put_autosuspend(dev: ptdev->base.dev);
2575
2576	out_dev_exit:
2577	drm_dev_exit(idx: cookie);
2578	}
2579
2580	static int panthor_queue_eval_syncwait(struct panthor_group *group, u8 queue_idx)
2581	{
2582	struct panthor_queue *queue = group->queues[queue_idx];
2583	union {
2584	struct panthor_syncobj_64b sync64;
2585	struct panthor_syncobj_32b sync32;
2586	} *syncobj;
2587	bool result;
2588	u64 value;
2589
2590	syncobj = panthor_queue_get_syncwait_obj(group, queue);
2591	if (!syncobj)
2592	return -EINVAL;
2593
2594	value = queue->syncwait.sync64 ?
2595	syncobj->sync64.seqno :
2596	syncobj->sync32.seqno;
2597
2598	if (queue->syncwait.gt)
2599	result = value > queue->syncwait.ref;
2600	else
2601	result = value <= queue->syncwait.ref;
2602
2603	if (result)
2604	panthor_queue_put_syncwait_obj(queue);
2605
2606	return result;
2607	}
2608
2609	static void sync_upd_work(struct work_struct *work)
2610	{
2611	struct panthor_scheduler *sched = container_of(work,
2612	struct panthor_scheduler,
2613	sync_upd_work);
2614	struct panthor_group group, tmp;
2615	bool immediate_tick = false;
2616
2617	mutex_lock(&sched->lock);
2618	list_for_each_entry_safe(group, tmp, &sched->groups.waiting, wait_node) {
2619	u32 tested_queues = group->blocked_queues;
2620	u32 unblocked_queues = `0`;
2621
2622	while (tested_queues) {
2623	u32 cs_id = ffs(tested_queues) - `1`;
2624	int ret;
2625
2626	ret = panthor_queue_eval_syncwait(group, queue_idx: cs_id);
2627	drm_WARN_ON(&group->ptdev->base, ret < `0`);
2628	if (ret)
2629	unblocked_queues \|= BIT(cs_id);
2630
2631	tested_queues &= ~BIT(cs_id);
2632	}
2633
2634	if (unblocked_queues) {
2635	group->blocked_queues &= ~unblocked_queues;
2636
2637	if (group->csg_id < `0`) {
2638	list_move(list: &group->run_node,
2639	head: &sched->groups.runnable[group->priority]);
2640	if (group->priority == PANTHOR_CSG_PRIORITY_RT)
2641	immediate_tick = true;
2642	}
2643	}
2644
2645	if (!group->blocked_queues)
2646	list_del_init(entry: &group->wait_node);
2647	}
2648	mutex_unlock(lock: &sched->lock);
2649
2650	if (immediate_tick)
2651	sched_queue_delayed_work(sched, tick, `0`);
2652	}
2653
2654	static void group_schedule_locked(struct panthor_group *group, u32 queue_mask)
2655	{
2656	struct panthor_device *ptdev = group->ptdev;
2657	struct panthor_scheduler *sched = ptdev->scheduler;
2658	struct list_head *queue = &sched->groups.runnable[group->priority];
2659	u64 delay_jiffies = `0`;
2660	bool was_idle;
2661	u64 now;
2662
2663	if (!group_can_run(group))
2664	return;
2665
2666	/ All updated queues are blocked, no need to wake up the scheduler. /
2667	if ((queue_mask & group->blocked_queues) == queue_mask)
2668	return;
2669
2670	was_idle = group_is_idle(group);
2671	group->idle_queues &= ~queue_mask;
2672
2673	/ Don't mess up with the lists if we're in a middle of a reset. /
2674	if (atomic_read(v: &sched->reset.in_progress))
2675	return;
2676
2677	if (was_idle && !group_is_idle(group))
2678	list_move_tail(list: &group->run_node, head: queue);
2679
2680	/ RT groups are preemptive. /
2681	if (group->priority == PANTHOR_CSG_PRIORITY_RT) {
2682	sched_queue_delayed_work(sched, tick, `0`);
2683	return;
2684	}
2685
2686	/ Some groups might be idle, force an immediate tick to*
2687	* re-evaluate.
2688	*/
2689	if (sched->might_have_idle_groups) {
2690	sched_queue_delayed_work(sched, tick, `0`);
2691	return;
2692	}
2693
2694	/ Scheduler is ticking, nothing to do. /
2695	if (sched->resched_target != U64_MAX) {
2696	/ If there are free slots, force immediating ticking. /
2697	if (sched->used_csg_slot_count < sched->csg_slot_count)
2698	sched_queue_delayed_work(sched, tick, `0`);
2699
2700	return;
2701	}
2702
2703	/ Scheduler tick was off, recalculate the resched_target based on the*
2704	* last tick event, and queue the scheduler work.
2705	*/
2706	now = get_jiffies_64();
2707	sched->resched_target = sched->last_tick + sched->tick_period;
2708	if (sched->used_csg_slot_count == sched->csg_slot_count &&
2709	time_before64(now, sched->resched_target))
2710	delay_jiffies = min_t(unsigned long, sched->resched_target - now, ULONG_MAX);
2711
2712	sched_queue_delayed_work(sched, tick, delay_jiffies);
2713	}
2714
2715	static void queue_stop(struct panthor_queue *queue,
2716	struct panthor_job *bad_job)
2717	{
2718	disable_delayed_work_sync(dwork: &queue->timeout.work);
2719	drm_sched_stop(sched: &queue->scheduler, bad: bad_job ? &bad_job->base : NULL);
2720	}
2721
2722	static void queue_start(struct panthor_queue *queue)
2723	{
2724	struct panthor_job *job;
2725
2726	/ Re-assign the parent fences. /
2727	list_for_each_entry(job, &queue->scheduler.pending_list, base.list)
2728	job->base.s_fence->parent = dma_fence_get(fence: job->done_fence);
2729
2730	enable_delayed_work(dwork: &queue->timeout.work);
2731	drm_sched_start(sched: &queue->scheduler, errno: `0`);
2732	}
2733
2734	static void panthor_group_stop(struct panthor_group *group)
2735	{
2736	struct panthor_scheduler *sched = group->ptdev->scheduler;
2737
2738	lockdep_assert_held(&sched->reset.lock);
2739
2740	for (u32 i = `0`; i < group->queue_count; i++)
2741	queue_stop(queue: group->queues[i], NULL);
2742
2743	group_get(group);
2744	list_move_tail(list: &group->run_node, head: &sched->reset.stopped_groups);
2745	}
2746
2747	static void panthor_group_start(struct panthor_group *group)
2748	{
2749	struct panthor_scheduler *sched = group->ptdev->scheduler;
2750
2751	lockdep_assert_held(&group->ptdev->scheduler->reset.lock);
2752
2753	for (u32 i = `0`; i < group->queue_count; i++)
2754	queue_start(queue: group->queues[i]);
2755
2756	if (group_can_run(group)) {
2757	list_move_tail(list: &group->run_node,
2758	head: group_is_idle(group) ?
2759	&sched->groups.idle[group->priority] :
2760	&sched->groups.runnable[group->priority]);
2761	} else {
2762	list_del_init(entry: &group->run_node);
2763	list_del_init(entry: &group->wait_node);
2764	group_queue_work(group, term);
2765	}
2766
2767	group_put(group);
2768	}
2769
2770	static void panthor_sched_immediate_tick(struct panthor_device *ptdev)
2771	{
2772	struct panthor_scheduler *sched = ptdev->scheduler;
2773
2774	sched_queue_delayed_work(sched, tick, `0`);
2775	}
2776
2777	/**
2778	* panthor_sched_report_mmu_fault() - Report MMU faults to the scheduler.
2779	*/
2780	void panthor_sched_report_mmu_fault(struct panthor_device *ptdev)
2781	{
2782	/ Force a tick to immediately kill faulty groups. /
2783	if (ptdev->scheduler)
2784	panthor_sched_immediate_tick(ptdev);
2785	}
2786
2787	void panthor_sched_resume(struct panthor_device *ptdev)
2788	{
2789	/ Force a tick to re-evaluate after a resume. /
2790	panthor_sched_immediate_tick(ptdev);
2791	}
2792
2793	void panthor_sched_suspend(struct panthor_device *ptdev)
2794	{
2795	struct panthor_scheduler *sched = ptdev->scheduler;
2796	struct panthor_csg_slots_upd_ctx upd_ctx;
2797	u32 suspended_slots;
2798	u32 i;
2799
2800	mutex_lock(&sched->lock);
2801	csgs_upd_ctx_init(ctx: &upd_ctx);
2802	for (i = `0`; i < sched->csg_slot_count; i++) {
2803	struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2804
2805	if (csg_slot->group) {
2806	csgs_upd_ctx_queue_reqs(ptdev, ctx: &upd_ctx, csg_id: i,
2807	value: group_can_run(group: csg_slot->group) ?
2808	CSG_STATE_SUSPEND : CSG_STATE_TERMINATE,
2809	CSG_STATE_MASK);
2810	}
2811	}
2812
2813	suspended_slots = upd_ctx.update_mask;
2814
2815	csgs_upd_ctx_apply_locked(ptdev, ctx: &upd_ctx);
2816	suspended_slots &= ~upd_ctx.timedout_mask;
2817
2818	if (upd_ctx.timedout_mask) {
2819	u32 slot_mask = upd_ctx.timedout_mask;
2820
2821	drm_err(&ptdev->base, "CSG suspend failed, escalating to termination");
2822	csgs_upd_ctx_init(ctx: &upd_ctx);
2823	while (slot_mask) {
2824	u32 csg_id = ffs(slot_mask) - `1`;
2825	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2826
2827	/ If the group was still usable before that point, we consider*
2828	* it innocent.
2829	*/
2830	if (group_can_run(group: csg_slot->group))
2831	csg_slot->group->innocent = true;
2832
2833	/ We consider group suspension failures as fatal and flag the*
2834	* group as unusable by setting timedout=true.
2835	*/
2836	csg_slot->group->timedout = true;
2837
2838	csgs_upd_ctx_queue_reqs(ptdev, ctx: &upd_ctx, csg_id,
2839	CSG_STATE_TERMINATE,
2840	CSG_STATE_MASK);
2841	slot_mask &= ~BIT(csg_id);
2842	}
2843
2844	csgs_upd_ctx_apply_locked(ptdev, ctx: &upd_ctx);
2845
2846	slot_mask = upd_ctx.timedout_mask;
2847	while (slot_mask) {
2848	u32 csg_id = ffs(slot_mask) - `1`;
2849	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2850	struct panthor_group *group = csg_slot->group;
2851
2852	/ Terminate command timedout, but the soft-reset will*
2853	* automatically terminate all active groups, so let's
2854	* force the state to halted here.
2855	*/
2856	if (group->state != PANTHOR_CS_GROUP_TERMINATED) {
2857	group->state = PANTHOR_CS_GROUP_TERMINATED;
2858
2859	/ Reset the queue slots manually if the termination*
2860	* request failed.
2861	*/
2862	for (i = `0`; i < group->queue_count; i++) {
2863	if (group->queues[i])
2864	cs_slot_reset_locked(ptdev, csg_id, cs_id: i);
2865	}
2866	}
2867	slot_mask &= ~BIT(csg_id);
2868	}
2869	}
2870
2871	/ Flush L2 and LSC caches to make sure suspend state is up-to-date.*
2872	* If the flush fails, flag all queues for termination.
2873	*/
2874	if (suspended_slots) {
2875	bool flush_caches_failed = false;
2876	u32 slot_mask = suspended_slots;
2877
2878	if (panthor_gpu_flush_caches(ptdev, CACHE_CLEAN, CACHE_CLEAN, other: `0`))
2879	flush_caches_failed = true;
2880
2881	while (slot_mask) {
2882	u32 csg_id = ffs(slot_mask) - `1`;
2883	struct panthor_csg_slot *csg_slot = &sched->csg_slots[csg_id];
2884
2885	if (flush_caches_failed)
2886	csg_slot->group->state = PANTHOR_CS_GROUP_TERMINATED;
2887	else
2888	csg_slot_sync_update_locked(ptdev, csg_id);
2889
2890	slot_mask &= ~BIT(csg_id);
2891	}
2892	}
2893
2894	for (i = `0`; i < sched->csg_slot_count; i++) {
2895	struct panthor_csg_slot *csg_slot = &sched->csg_slots[i];
2896	struct panthor_group *group = csg_slot->group;
2897
2898	if (!group)
2899	continue;
2900
2901	group_get(group);
2902
2903	if (group->csg_id >= `0`)
2904	sched_process_csg_irq_locked(ptdev, csg_id: group->csg_id);
2905
2906	group_unbind_locked(group);
2907
2908	drm_WARN_ON(&group->ptdev->base, !list_empty(&group->run_node));
2909
2910	if (group_can_run(group)) {
2911	list_add(new: &group->run_node,
2912	head: &sched->groups.idle[group->priority]);
2913	} else {
2914	/ We don't bother stopping the scheduler if the group is*
2915	* faulty, the group termination work will finish the job.
2916	*/
2917	list_del_init(entry: &group->wait_node);
2918	group_queue_work(group, term);
2919	}
2920	group_put(group);
2921	}
2922	mutex_unlock(lock: &sched->lock);
2923	}
2924
2925	void panthor_sched_pre_reset(struct panthor_device *ptdev)
2926	{
2927	struct panthor_scheduler *sched = ptdev->scheduler;
2928	struct panthor_group group, group_tmp;
2929	u32 i;
2930
2931	mutex_lock(&sched->reset.lock);
2932	atomic_set(v: &sched->reset.in_progress, i: true);
2933
2934	/ Cancel all scheduler works. Once this is done, these works can't be*
2935	* scheduled again until the reset operation is complete.
2936	*/
2937	cancel_work_sync(work: &sched->sync_upd_work);
2938	cancel_delayed_work_sync(dwork: &sched->tick_work);
2939
2940	panthor_sched_suspend(ptdev);
2941
2942	/ Stop all groups that might still accept jobs, so we don't get passed*
2943	* new jobs while we're resetting.
2944	*/
2945	for (i = `0`; i < ARRAY_SIZE(sched->groups.runnable); i++) {
2946	/ All groups should be in the idle lists. /
2947	drm_WARN_ON(&ptdev->base, !list_empty(&sched->groups.runnable[i]));
2948	list_for_each_entry_safe(group, group_tmp, &sched->groups.runnable[i], run_node)
2949	panthor_group_stop(group);
2950	}
2951
2952	for (i = `0`; i < ARRAY_SIZE(sched->groups.idle); i++) {
2953	list_for_each_entry_safe(group, group_tmp, &sched->groups.idle[i], run_node)
2954	panthor_group_stop(group);
2955	}
2956
2957	mutex_unlock(lock: &sched->reset.lock);
2958	}
2959
2960	void panthor_sched_post_reset(struct panthor_device *ptdev, bool reset_failed)
2961	{
2962	struct panthor_scheduler *sched = ptdev->scheduler;
2963	struct panthor_group group, group_tmp;
2964
2965	mutex_lock(&sched->reset.lock);
2966
2967	list_for_each_entry_safe(group, group_tmp, &sched->reset.stopped_groups, run_node) {
2968	/ Consider all previously running group as terminated if the*
2969	* reset failed.
2970	*/
2971	if (reset_failed)
2972	group->state = PANTHOR_CS_GROUP_TERMINATED;
2973
2974	panthor_group_start(group);
2975	}
2976
2977	/ We're done resetting the GPU, clear the reset.in_progress bit so we can*
2978	* kick the scheduler.
2979	*/
2980	atomic_set(v: &sched->reset.in_progress, i: false);
2981	mutex_unlock(lock: &sched->reset.lock);
2982
2983	/ No need to queue a tick and update syncs if the reset failed. /
2984	if (!reset_failed) {
2985	sched_queue_delayed_work(sched, tick, `0`);
2986	sched_queue_work(sched, sync_upd);
2987	}
2988	}
2989
2990	static void update_fdinfo_stats(struct panthor_job *job)
2991	{
2992	struct panthor_group *group = job->group;
2993	struct panthor_queue *queue = group->queues[job->queue_idx];
2994	struct panthor_gpu_usage *fdinfo = &group->fdinfo.data;
2995	struct panthor_job_profiling_data *slots = queue->profiling.slots->kmap;
2996	struct panthor_job_profiling_data *data = &slots[job->profiling.slot];
2997
2998	scoped_guard(spinlock, &group->fdinfo.lock) {
2999	if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_CYCLES)
3000	fdinfo->cycles += data->cycles.after - data->cycles.before;
3001	if (job->profiling.mask & PANTHOR_DEVICE_PROFILING_TIMESTAMP)
3002	fdinfo->time += data->time.after - data->time.before;
3003	}
3004	}
3005
3006	void panthor_fdinfo_gather_group_samples(struct panthor_file *pfile)
3007	{
3008	struct panthor_group_pool *gpool = pfile->groups;
3009	struct panthor_group *group;
3010	unsigned long i;
3011
3012	if (IS_ERR_OR_NULL(ptr: gpool))
3013	return;
3014
3015	xa_lock(&gpool->xa);
3016	xa_for_each_marked(&gpool->xa, i, group, GROUP_REGISTERED) {
3017	guard(spinlock)(l: &group->fdinfo.lock);
3018	pfile->stats.cycles += group->fdinfo.data.cycles;
3019	pfile->stats.time += group->fdinfo.data.time;
3020	group->fdinfo.data.cycles = `0`;
3021	group->fdinfo.data.time = `0`;
3022	}
3023	xa_unlock(&gpool->xa);
3024	}
3025
3026	static bool queue_check_job_completion(struct panthor_queue *queue)
3027	{
3028	struct panthor_syncobj_64b *syncobj = NULL;
3029	struct panthor_job job, job_tmp;
3030	bool cookie, progress = false;
3031	LIST_HEAD(done_jobs);
3032
3033	cookie = dma_fence_begin_signalling();
3034	spin_lock(lock: &queue->fence_ctx.lock);
3035	list_for_each_entry_safe(job, job_tmp, &queue->fence_ctx.in_flight_jobs, node) {
3036	if (!syncobj) {
3037	struct panthor_group *group = job->group;
3038
3039	syncobj = group->syncobjs->kmap +
3040	(job->queue_idx * sizeof(*syncobj));
3041	}
3042
3043	if (syncobj->seqno < job->done_fence->seqno)
3044	break;
3045
3046	list_move_tail(list: &job->node, head: &done_jobs);
3047	dma_fence_signal_locked(fence: job->done_fence);
3048	}
3049
3050	if (list_empty(head: &queue->fence_ctx.in_flight_jobs)) {
3051	/ If we have no job left, we cancel the timer, and reset remaining*
3052	* time to its default so it can be restarted next time
3053	* queue_resume_timeout() is called.
3054	*/
3055	queue_suspend_timeout_locked(queue);
3056
3057	/ If there's no job pending, we consider it progress to avoid a*
3058	* spurious timeout if the timeout handler and the sync update
3059	* handler raced.
3060	*/
3061	progress = true;
3062	} else if (!list_empty(head: &done_jobs)) {
3063	queue_reset_timeout_locked(queue);
3064	progress = true;
3065	}
3066	spin_unlock(lock: &queue->fence_ctx.lock);
3067	dma_fence_end_signalling(cookie);
3068
3069	list_for_each_entry_safe(job, job_tmp, &done_jobs, node) {
3070	if (job->profiling.mask)
3071	update_fdinfo_stats(job);
3072	list_del_init(entry: &job->node);
3073	panthor_job_put(job: &job->base);
3074	}
3075
3076	return progress;
3077	}
3078
3079	static void group_sync_upd_work(struct work_struct *work)
3080	{
3081	struct panthor_group *group =
3082	container_of(work, struct panthor_group, sync_upd_work);
3083	u32 queue_idx;
3084	bool cookie;
3085
3086	cookie = dma_fence_begin_signalling();
3087	for (queue_idx = `0`; queue_idx < group->queue_count; queue_idx++) {
3088	struct panthor_queue *queue = group->queues[queue_idx];
3089
3090	if (!queue)
3091	continue;
3092
3093	queue_check_job_completion(queue);
3094	}
3095	dma_fence_end_signalling(cookie);
3096
3097	group_put(group);
3098	}
3099
3100	struct panthor_job_ringbuf_instrs {
3101	u64 buffer[MAX_INSTRS_PER_JOB];
3102	u32 count;
3103	};
3104
3105	struct panthor_job_instr {
3106	u32 profile_mask;
3107	u64 instr;
3108	};
3109
3110	#define JOB_INSTR(__prof, __instr) \
3111	{ \
3112	.profile_mask = __prof, \
3113	.instr = __instr, \
3114	}
3115
3116	static void
3117	copy_instrs_to_ringbuf(struct panthor_queue *queue,
3118	struct panthor_job *job,
3119	struct panthor_job_ringbuf_instrs *instrs)
3120	{
3121	u64 ringbuf_size = panthor_kernel_bo_size(bo: queue->ringbuf);
3122	u64 start = job->ringbuf.start & (ringbuf_size - `1`);
3123	u64 size, written;
3124
3125	/*
3126	* We need to write a whole slot, including any trailing zeroes
3127	* that may come at the end of it. Also, because instrs.buffer has
3128	* been zero-initialised, there's no need to pad it with 0's
3129	*/
3130	instrs->count = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE);
3131	size = instrs->count * sizeof(u64);
3132	WARN_ON(size > ringbuf_size);
3133	written = min(ringbuf_size - start, size);
3134
3135	memcpy(queue->ringbuf->kmap + start, instrs->buffer, written);
3136
3137	if (written < size)
3138	memcpy(queue->ringbuf->kmap,
3139	&instrs->buffer[written / sizeof(u64)],
3140	size - written);
3141	}
3142
3143	struct panthor_job_cs_params {
3144	u32 profile_mask;
3145	u64 addr_reg; u64 val_reg;
3146	u64 cycle_reg; u64 time_reg;
3147	u64 sync_addr; u64 times_addr;
3148	u64 cs_start; u64 cs_size;
3149	u32 last_flush; u32 waitall_mask;
3150	};
3151
3152	static void
3153	get_job_cs_params(struct panthor_job job, struct* panthor_job_cs_params *params)
3154	{
3155	struct panthor_group *group = job->group;
3156	struct panthor_queue *queue = group->queues[job->queue_idx];
3157	struct panthor_device *ptdev = group->ptdev;
3158	struct panthor_scheduler *sched = ptdev->scheduler;
3159
3160	params->addr_reg = ptdev->csif_info.cs_reg_count -
3161	ptdev->csif_info.unpreserved_cs_reg_count;
3162	params->val_reg = params->addr_reg + `2`;
3163	params->cycle_reg = params->addr_reg;
3164	params->time_reg = params->val_reg;
3165
3166	params->sync_addr = panthor_kernel_bo_gpuva(bo: group->syncobjs) +
3167	job->queue_idx * sizeof(struct panthor_syncobj_64b);
3168	params->times_addr = panthor_kernel_bo_gpuva(bo: queue->profiling.slots) +
3169	(job->profiling.slot * sizeof(struct panthor_job_profiling_data));
3170	params->waitall_mask = GENMASK(sched->sb_slot_count - `1`, `0`);
3171
3172	params->cs_start = job->call_info.start;
3173	params->cs_size = job->call_info.size;
3174	params->last_flush = job->call_info.latest_flush;
3175
3176	params->profile_mask = job->profiling.mask;
3177	}
3178
3179	#define JOB_INSTR_ALWAYS(instr) \
3180	JOB_INSTR(PANTHOR_DEVICE_PROFILING_DISABLED, (instr))
3181	#define JOB_INSTR_TIMESTAMP(instr) \
3182	JOB_INSTR(PANTHOR_DEVICE_PROFILING_TIMESTAMP, (instr))
3183	#define JOB_INSTR_CYCLES(instr) \
3184	JOB_INSTR(PANTHOR_DEVICE_PROFILING_CYCLES, (instr))
3185
3186	static void
3187	prepare_job_instrs(const struct panthor_job_cs_params *params,
3188	struct panthor_job_ringbuf_instrs *instrs)
3189	{
3190	const struct panthor_job_instr instr_seq[] = {
3191	/ MOV32 rX+2, cs.latest_flush /
3192	JOB_INSTR_ALWAYS((`2ull` << `56`) \| (params->val_reg << `48`) \| params->last_flush),
3193	/ FLUSH_CACHE2.clean_inv_all.no_wait.signal(0) rX+2 /
3194	JOB_INSTR_ALWAYS((`36ull` << `56`) \| (`0ull` << `48`) \| (params->val_reg << `40`) \|
3195	(`0` << `16`) \| `0x233`),
3196	/ MOV48 rX:rX+1, cycles_offset /
3197	JOB_INSTR_CYCLES((`1ull` << `56`) \| (params->cycle_reg << `48`) \|
3198	(params->times_addr +
3199	offsetof(struct panthor_job_profiling_data, cycles.before))),
3200	/ STORE_STATE cycles /
3201	JOB_INSTR_CYCLES((`40ull` << `56`) \| (params->cycle_reg << `40`) \| (`1ll` << `32`)),
3202	/ MOV48 rX:rX+1, time_offset /
3203	JOB_INSTR_TIMESTAMP((`1ull` << `56`) \| (params->time_reg << `48`) \|
3204	(params->times_addr +
3205	offsetof(struct panthor_job_profiling_data, time.before))),
3206	/ STORE_STATE timer /
3207	JOB_INSTR_TIMESTAMP((`40ull` << `56`) \| (params->time_reg << `40`) \| (`0ll` << `32`)),
3208	/ MOV48 rX:rX+1, cs.start /
3209	JOB_INSTR_ALWAYS((`1ull` << `56`) \| (params->addr_reg << `48`) \| params->cs_start),
3210	/ MOV32 rX+2, cs.size /
3211	JOB_INSTR_ALWAYS((`2ull` << `56`) \| (params->val_reg << `48`) \| params->cs_size),
3212	/ WAIT(0) => waits for FLUSH_CACHE2 instruction /
3213	JOB_INSTR_ALWAYS((`3ull` << `56`) \| (`1` << `16`)),
3214	/ CALL rX:rX+1, rX+2 /
3215	JOB_INSTR_ALWAYS((`32ull` << `56`) \| (params->addr_reg << `40`) \|
3216	(params->val_reg << `32`)),
3217	/ MOV48 rX:rX+1, cycles_offset /
3218	JOB_INSTR_CYCLES((`1ull` << `56`) \| (params->cycle_reg << `48`) \|
3219	(params->times_addr +
3220	offsetof(struct panthor_job_profiling_data, cycles.after))),
3221	/ STORE_STATE cycles /
3222	JOB_INSTR_CYCLES((`40ull` << `56`) \| (params->cycle_reg << `40`) \| (`1ll` << `32`)),
3223	/ MOV48 rX:rX+1, time_offset /
3224	JOB_INSTR_TIMESTAMP((`1ull` << `56`) \| (params->time_reg << `48`) \|
3225	(params->times_addr +
3226	offsetof(struct panthor_job_profiling_data, time.after))),
3227	/ STORE_STATE timer /
3228	JOB_INSTR_TIMESTAMP((`40ull` << `56`) \| (params->time_reg << `40`) \| (`0ll` << `32`)),
3229	/ MOV48 rX:rX+1, sync_addr /
3230	JOB_INSTR_ALWAYS((`1ull` << `56`) \| (params->addr_reg << `48`) \| params->sync_addr),
3231	/ MOV48 rX+2, #1 /
3232	JOB_INSTR_ALWAYS((`1ull` << `56`) \| (params->val_reg << `48`) \| `1`),
3233	/ WAIT(all) /
3234	JOB_INSTR_ALWAYS((`3ull` << `56`) \| (params->waitall_mask << `16`)),
3235	/ SYNC_ADD64.system_scope.propage_err.nowait rX:rX+1, rX+2/
3236	JOB_INSTR_ALWAYS((`51ull` << `56`) \| (`0ull` << `48`) \| (params->addr_reg << `40`) \|
3237	(params->val_reg << `32`) \| (`0` << `16`) \| `1`),
3238	/ ERROR_BARRIER, so we can recover from faults at job boundaries. /
3239	JOB_INSTR_ALWAYS((`47ull` << `56`)),
3240	};
3241	u32 pad;
3242
3243	instrs->count = `0`;
3244
3245	/ NEED to be cacheline aligned to please the prefetcher. /
3246	static_assert(sizeof(instrs->buffer) % `64` == `0`,
3247	"panthor_job_ringbuf_instrs::buffer is not aligned on a cacheline");
3248
3249	/ Make sure we have enough storage to store the whole sequence. /
3250	static_assert(ALIGN(ARRAY_SIZE(instr_seq), NUM_INSTRS_PER_CACHE_LINE) ==
3251	ARRAY_SIZE(instrs->buffer),
3252	"instr_seq vs panthor_job_ringbuf_instrs::buffer size mismatch");
3253
3254	for (u32 i = `0`; i < ARRAY_SIZE(instr_seq); i++) {
3255	/ If the profile mask of this instruction is not enabled, skip it. /
3256	if (instr_seq[i].profile_mask &&
3257	!(instr_seq[i].profile_mask & params->profile_mask))
3258	continue;
3259
3260	instrs->buffer[instrs->count++] = instr_seq[i].instr;
3261	}
3262
3263	pad = ALIGN(instrs->count, NUM_INSTRS_PER_CACHE_LINE);
3264	memset(&instrs->buffer[instrs->count], `0`,
3265	(pad - instrs->count) * sizeof(instrs->buffer[`0`]));
3266	instrs->count = pad;
3267	}
3268
3269	static u32 calc_job_credits(u32 profile_mask)
3270	{
3271	struct panthor_job_ringbuf_instrs instrs;
3272	struct panthor_job_cs_params params = {
3273	.profile_mask = profile_mask,
3274	};
3275
3276	prepare_job_instrs(params: &params, instrs: &instrs);
3277	return instrs.count;
3278	}
3279
3280	static struct dma_fence *
3281	queue_run_job(struct drm_sched_job *sched_job)
3282	{
3283	struct panthor_job job = container_of(sched_job, struct* panthor_job, base);
3284	struct panthor_group *group = job->group;
3285	struct panthor_queue *queue = group->queues[job->queue_idx];
3286	struct panthor_device *ptdev = group->ptdev;
3287	struct panthor_scheduler *sched = ptdev->scheduler;
3288	struct panthor_job_ringbuf_instrs instrs;
3289	struct panthor_job_cs_params cs_params;
3290	struct dma_fence *done_fence;
3291	int ret;
3292
3293	/ Stream size is zero, nothing to do except making sure all previously*
3294	* submitted jobs are done before we signal the
3295	* drm_sched_job::s_fence::finished fence.
3296	*/
3297	if (!job->call_info.size) {
3298	job->done_fence = dma_fence_get(fence: queue->fence_ctx.last_fence);
3299	return dma_fence_get(fence: job->done_fence);
3300	}
3301
3302	ret = panthor_device_resume_and_get(ptdev);
3303	if (drm_WARN_ON(&ptdev->base, ret))
3304	return ERR_PTR(error: ret);
3305
3306	mutex_lock(&sched->lock);
3307	if (!group_can_run(group)) {
3308	done_fence = ERR_PTR(error: -ECANCELED);
3309	goto out_unlock;
3310	}
3311
3312	dma_fence_init(fence: job->done_fence,
3313	ops: &panthor_queue_fence_ops,
3314	lock: &queue->fence_ctx.lock,
3315	context: queue->fence_ctx.id,
3316	seqno: atomic64_inc_return(v: &queue->fence_ctx.seqno));
3317
3318	job->profiling.slot = queue->profiling.seqno++;
3319	if (queue->profiling.seqno == queue->profiling.slot_count)
3320	queue->profiling.seqno = `0`;
3321
3322	job->ringbuf.start = queue->iface.input->insert;
3323
3324	get_job_cs_params(job, params: &cs_params);
3325	prepare_job_instrs(params: &cs_params, instrs: &instrs);
3326	copy_instrs_to_ringbuf(queue, job, instrs: &instrs);
3327
3328	job->ringbuf.end = job->ringbuf.start + (instrs.count * sizeof(u64));
3329
3330	panthor_job_get(job: &job->base);
3331	spin_lock(lock: &queue->fence_ctx.lock);
3332	list_add_tail(new: &job->node, head: &queue->fence_ctx.in_flight_jobs);
3333	spin_unlock(lock: &queue->fence_ctx.lock);
3334
3335	/ Make sure the ring buffer is updated before the INSERT*
3336	* register.
3337	*/
3338	wmb();
3339
3340	queue->iface.input->extract = queue->iface.output->extract;
3341	queue->iface.input->insert = job->ringbuf.end;
3342
3343	if (group->csg_id < `0`) {
3344	group_schedule_locked(group, BIT(job->queue_idx));
3345	} else {
3346	gpu_write(ptdev, CSF_DOORBELL(queue->doorbell_id), data: `1`);
3347	if (!sched->pm.has_ref &&
3348	!(group->blocked_queues & BIT(job->queue_idx))) {
3349	pm_runtime_get(dev: ptdev->base.dev);
3350	sched->pm.has_ref = true;
3351	}
3352	queue_resume_timeout(queue);
3353	panthor_devfreq_record_busy(ptdev: sched->ptdev);
3354	}
3355
3356	/ Update the last fence. /
3357	dma_fence_put(fence: queue->fence_ctx.last_fence);
3358	queue->fence_ctx.last_fence = dma_fence_get(fence: job->done_fence);
3359
3360	done_fence = dma_fence_get(fence: job->done_fence);
3361
3362	out_unlock:
3363	mutex_unlock(lock: &sched->lock);
3364	pm_runtime_mark_last_busy(dev: ptdev->base.dev);
3365	pm_runtime_put_autosuspend(dev: ptdev->base.dev);
3366
3367	return done_fence;
3368	}
3369
3370	static enum drm_gpu_sched_stat
3371	queue_timedout_job(struct drm_sched_job *sched_job)
3372	{
3373	struct panthor_job job = container_of(sched_job, struct* panthor_job, base);
3374	struct panthor_group *group = job->group;
3375	struct panthor_device *ptdev = group->ptdev;
3376	struct panthor_scheduler *sched = ptdev->scheduler;
3377	struct panthor_queue *queue = group->queues[job->queue_idx];
3378
3379	drm_warn(&ptdev->base, "job timeout: pid=%d, comm=%s, seqno=%llu\n",
3380	group->task_info.pid, group->task_info.comm, job->done_fence->seqno);
3381
3382	drm_WARN_ON(&ptdev->base, atomic_read(&sched->reset.in_progress));
3383
3384	queue_stop(queue, bad_job: job);
3385
3386	mutex_lock(&sched->lock);
3387	group->timedout = true;
3388	if (group->csg_id >= `0`) {
3389	sched_queue_delayed_work(ptdev->scheduler, tick, `0`);
3390	} else {
3391	/ Remove from the run queues, so the scheduler can't*
3392	* pick the group on the next tick.
3393	*/
3394	list_del_init(entry: &group->run_node);
3395	list_del_init(entry: &group->wait_node);
3396
3397	group_queue_work(group, term);
3398	}
3399	mutex_unlock(lock: &sched->lock);
3400
3401	queue_start(queue);
3402	return DRM_GPU_SCHED_STAT_RESET;
3403	}
3404
3405	static void queue_free_job(struct drm_sched_job *sched_job)
3406	{
3407	drm_sched_job_cleanup(job: sched_job);
3408	panthor_job_put(job: sched_job);
3409	}
3410
3411	static const struct drm_sched_backend_ops panthor_queue_sched_ops = {
3412	.run_job = queue_run_job,
3413	.timedout_job = queue_timedout_job,
3414	.free_job = queue_free_job,
3415	};
3416
3417	static u32 calc_profiling_ringbuf_num_slots(struct panthor_device *ptdev,
3418	u32 cs_ringbuf_size)
3419	{
3420	u32 min_profiled_job_instrs = U32_MAX;
3421	u32 last_flag = fls(x: PANTHOR_DEVICE_PROFILING_ALL);
3422
3423	/*
3424	* We want to calculate the minimum size of a profiled job's CS,
3425	* because since they need additional instructions for the sampling
3426	* of performance metrics, they might take up further slots in
3427	* the queue's ringbuffer. This means we might not need as many job
3428	* slots for keeping track of their profiling information. What we
3429	* need is the maximum number of slots we should allocate to this end,
3430	* which matches the maximum number of profiled jobs we can place
3431	* simultaneously in the queue's ring buffer.
3432	* That has to be calculated separately for every single job profiling
3433	* flag, but not in the case job profiling is disabled, since unprofiled
3434	* jobs don't need to keep track of this at all.
3435	*/
3436	for (u32 i = `0`; i < last_flag; i++) {
3437	min_profiled_job_instrs =
3438	min(min_profiled_job_instrs, calc_job_credits(BIT(i)));
3439	}
3440
3441	return DIV_ROUND_UP(cs_ringbuf_size, min_profiled_job_instrs * sizeof(u64));
3442	}
3443
3444	static void queue_timeout_work(struct work_struct *work)
3445	{
3446	struct panthor_queue queue = container_of(work, struct* panthor_queue,
3447	timeout.work.work);
3448	bool progress;
3449
3450	progress = queue_check_job_completion(queue);
3451	if (!progress)
3452	drm_sched_fault(sched: &queue->scheduler);
3453	}
3454
3455	static struct panthor_queue *
3456	group_create_queue(struct panthor_group *group,
3457	const struct drm_panthor_queue_create *args,
3458	u64 drm_client_id, u32 gid, u32 qid)
3459	{
3460	struct drm_sched_init_args sched_args = {
3461	.ops = &panthor_queue_sched_ops,
3462	.submit_wq = group->ptdev->scheduler->wq,
3463	.num_rqs = `1`,
3464	/*
3465	* The credit limit argument tells us the total number of
3466	* instructions across all CS slots in the ringbuffer, with
3467	* some jobs requiring twice as many as others, depending on
3468	* their profiling status.
3469	*/
3470	.credit_limit = args->ringbuf_size / sizeof(u64),
3471	.timeout = MAX_SCHEDULE_TIMEOUT,
3472	.timeout_wq = group->ptdev->reset.wq,
3473	.dev = group->ptdev->base.dev,
3474	};
3475	struct drm_gpu_scheduler *drm_sched;
3476	struct panthor_queue *queue;
3477	int ret;
3478
3479	if (args->pad[`0`] \|\| args->pad[`1`] \|\| args->pad[`2`])
3480	return ERR_PTR(error: -EINVAL);
3481
3482	if (args->ringbuf_size < SZ_4K \|\| args->ringbuf_size > SZ_64K \|\|
3483	!is_power_of_2(n: args->ringbuf_size))
3484	return ERR_PTR(error: -EINVAL);
3485
3486	if (args->priority > CSF_MAX_QUEUE_PRIO)
3487	return ERR_PTR(error: -EINVAL);
3488
3489	queue = kzalloc(sizeof(*queue), GFP_KERNEL);
3490	if (!queue)
3491	return ERR_PTR(error: -ENOMEM);
3492
3493	queue->timeout.remaining = msecs_to_jiffies(JOB_TIMEOUT_MS);
3494	INIT_DELAYED_WORK(&queue->timeout.work, queue_timeout_work);
3495	queue->fence_ctx.id = dma_fence_context_alloc(num: `1`);
3496	spin_lock_init(&queue->fence_ctx.lock);
3497	INIT_LIST_HEAD(list: &queue->fence_ctx.in_flight_jobs);
3498
3499	queue->priority = args->priority;
3500
3501	queue->ringbuf = panthor_kernel_bo_create(ptdev: group->ptdev, vm: group->vm,
3502	size: args->ringbuf_size,
3503	bo_flags: DRM_PANTHOR_BO_NO_MMAP,
3504	vm_map_flags: DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC \|
3505	DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3506	PANTHOR_VM_KERNEL_AUTO_VA,
3507	name: "CS ring buffer");
3508	if (IS_ERR(ptr: queue->ringbuf)) {
3509	ret = PTR_ERR(ptr: queue->ringbuf);
3510	goto err_free_queue;
3511	}
3512
3513	ret = panthor_kernel_bo_vmap(bo: queue->ringbuf);
3514	if (ret)
3515	goto err_free_queue;
3516
3517	queue->iface.mem = panthor_fw_alloc_queue_iface_mem(ptdev: group->ptdev,
3518	input: &queue->iface.input,
3519	output: &queue->iface.output,
3520	input_fw_va: &queue->iface.input_fw_va,
3521	output_fw_va: &queue->iface.output_fw_va);
3522	if (IS_ERR(ptr: queue->iface.mem)) {
3523	ret = PTR_ERR(ptr: queue->iface.mem);
3524	goto err_free_queue;
3525	}
3526
3527	queue->profiling.slot_count =
3528	calc_profiling_ringbuf_num_slots(ptdev: group->ptdev, cs_ringbuf_size: args->ringbuf_size);
3529
3530	queue->profiling.slots =
3531	panthor_kernel_bo_create(ptdev: group->ptdev, vm: group->vm,
3532	size: queue->profiling.slot_count *
3533	sizeof(struct panthor_job_profiling_data),
3534	bo_flags: DRM_PANTHOR_BO_NO_MMAP,
3535	vm_map_flags: DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC \|
3536	DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3537	PANTHOR_VM_KERNEL_AUTO_VA,
3538	name: "Group job stats");
3539
3540	if (IS_ERR(ptr: queue->profiling.slots)) {
3541	ret = PTR_ERR(ptr: queue->profiling.slots);
3542	goto err_free_queue;
3543	}
3544
3545	ret = panthor_kernel_bo_vmap(bo: queue->profiling.slots);
3546	if (ret)
3547	goto err_free_queue;
3548
3549	/ assign a unique name /
3550	queue->name = kasprintf(GFP_KERNEL, fmt: "panthor-queue-%llu-%u-%u", drm_client_id, gid, qid);
3551	if (!queue->name) {
3552	ret = -ENOMEM;
3553	goto err_free_queue;
3554	}
3555
3556	sched_args.name = queue->name;
3557
3558	ret = drm_sched_init(sched: &queue->scheduler, args: &sched_args);
3559	if (ret)
3560	goto err_free_queue;
3561
3562	drm_sched = &queue->scheduler;
3563	ret = drm_sched_entity_init(entity: &queue->entity, priority: `0`, sched_list: &drm_sched, num_sched_list: `1`, NULL);
3564	if (ret)
3565	goto err_free_queue;
3566
3567	return queue;
3568
3569	err_free_queue:
3570	group_free_queue(group, queue);
3571	return ERR_PTR(error: ret);
3572	}
3573
3574	static void group_init_task_info(struct panthor_group *group)
3575	{
3576	struct task_struct *task = current->group_leader;
3577
3578	group->task_info.pid = task->pid;
3579	get_task_comm(group->task_info.comm, task);
3580	}
3581
3582	static void add_group_kbo_sizes(struct panthor_device *ptdev,
3583	struct panthor_group *group)
3584	{
3585	struct panthor_queue *queue;
3586	int i;
3587
3588	if (drm_WARN_ON(&ptdev->base, IS_ERR_OR_NULL(group)))
3589	return;
3590	if (drm_WARN_ON(&ptdev->base, ptdev != group->ptdev))
3591	return;
3592
3593	group->fdinfo.kbo_sizes += group->suspend_buf->obj->size;
3594	group->fdinfo.kbo_sizes += group->protm_suspend_buf->obj->size;
3595	group->fdinfo.kbo_sizes += group->syncobjs->obj->size;
3596
3597	for (i = `0`; i < group->queue_count; i++) {
3598	queue = group->queues[i];
3599	group->fdinfo.kbo_sizes += queue->ringbuf->obj->size;
3600	group->fdinfo.kbo_sizes += queue->iface.mem->obj->size;
3601	group->fdinfo.kbo_sizes += queue->profiling.slots->obj->size;
3602	}
3603	}
3604
3605	#define MAX_GROUPS_PER_POOL 128
3606
3607	int panthor_group_create(struct panthor_file *pfile,
3608	const struct drm_panthor_group_create *group_args,
3609	const struct drm_panthor_queue_create *queue_args,
3610	u64 drm_client_id)
3611	{
3612	struct panthor_device *ptdev = pfile->ptdev;
3613	struct panthor_group_pool *gpool = pfile->groups;
3614	struct panthor_scheduler *sched = ptdev->scheduler;
3615	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: `0`);
3616	struct panthor_group *group = NULL;
3617	u32 gid, i, suspend_size;
3618	int ret;
3619
3620	if (group_args->pad)
3621	return -EINVAL;
3622
3623	if (group_args->priority >= PANTHOR_CSG_PRIORITY_COUNT)
3624	return -EINVAL;
3625
3626	if ((group_args->compute_core_mask & ~ptdev->gpu_info.shader_present) \|\|
3627	(group_args->fragment_core_mask & ~ptdev->gpu_info.shader_present) \|\|
3628	(group_args->tiler_core_mask & ~ptdev->gpu_info.tiler_present))
3629	return -EINVAL;
3630
3631	if (hweight64(group_args->compute_core_mask) < group_args->max_compute_cores \|\|
3632	hweight64(group_args->fragment_core_mask) < group_args->max_fragment_cores \|\|
3633	hweight64(group_args->tiler_core_mask) < group_args->max_tiler_cores)
3634	return -EINVAL;
3635
3636	group = kzalloc(sizeof(*group), GFP_KERNEL);
3637	if (!group)
3638	return -ENOMEM;
3639
3640	spin_lock_init(&group->fatal_lock);
3641	kref_init(kref: &group->refcount);
3642	group->state = PANTHOR_CS_GROUP_CREATED;
3643	group->csg_id = -`1`;
3644
3645	group->ptdev = ptdev;
3646	group->max_compute_cores = group_args->max_compute_cores;
3647	group->compute_core_mask = group_args->compute_core_mask;
3648	group->max_fragment_cores = group_args->max_fragment_cores;
3649	group->fragment_core_mask = group_args->fragment_core_mask;
3650	group->max_tiler_cores = group_args->max_tiler_cores;
3651	group->tiler_core_mask = group_args->tiler_core_mask;
3652	group->priority = group_args->priority;
3653
3654	INIT_LIST_HEAD(list: &group->wait_node);
3655	INIT_LIST_HEAD(list: &group->run_node);
3656	INIT_WORK(&group->term_work, group_term_work);
3657	INIT_WORK(&group->sync_upd_work, group_sync_upd_work);
3658	INIT_WORK(&group->tiler_oom_work, group_tiler_oom_work);
3659	INIT_WORK(&group->release_work, group_release_work);
3660
3661	group->vm = panthor_vm_pool_get_vm(pool: pfile->vms, handle: group_args->vm_id);
3662	if (!group->vm) {
3663	ret = -EINVAL;
3664	goto err_put_group;
3665	}
3666
3667	suspend_size = csg_iface->control->suspend_size;
3668	group->suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, size: suspend_size);
3669	if (IS_ERR(ptr: group->suspend_buf)) {
3670	ret = PTR_ERR(ptr: group->suspend_buf);
3671	group->suspend_buf = NULL;
3672	goto err_put_group;
3673	}
3674
3675	suspend_size = csg_iface->control->protm_suspend_size;
3676	group->protm_suspend_buf = panthor_fw_alloc_suspend_buf_mem(ptdev, size: suspend_size);
3677	if (IS_ERR(ptr: group->protm_suspend_buf)) {
3678	ret = PTR_ERR(ptr: group->protm_suspend_buf);
3679	group->protm_suspend_buf = NULL;
3680	goto err_put_group;
3681	}
3682
3683	group->syncobjs = panthor_kernel_bo_create(ptdev, vm: group->vm,
3684	size: group_args->queues.count *
3685	sizeof(struct panthor_syncobj_64b),
3686	bo_flags: DRM_PANTHOR_BO_NO_MMAP,
3687	vm_map_flags: DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC \|
3688	DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED,
3689	PANTHOR_VM_KERNEL_AUTO_VA,
3690	name: "Group sync objects");
3691	if (IS_ERR(ptr: group->syncobjs)) {
3692	ret = PTR_ERR(ptr: group->syncobjs);
3693	goto err_put_group;
3694	}
3695
3696	ret = panthor_kernel_bo_vmap(bo: group->syncobjs);
3697	if (ret)
3698	goto err_put_group;
3699
3700	memset(group->syncobjs->kmap, `0`,
3701	group_args->queues.count * sizeof(struct panthor_syncobj_64b));
3702
3703	ret = xa_alloc(xa: &gpool->xa, id: &gid, entry: group, XA_LIMIT(`1`, MAX_GROUPS_PER_POOL), GFP_KERNEL);
3704	if (ret)
3705	goto err_put_group;
3706
3707	for (i = `0`; i < group_args->queues.count; i++) {
3708	group->queues[i] = group_create_queue(group, args: &queue_args[i], drm_client_id, gid, qid: i);
3709	if (IS_ERR(ptr: group->queues[i])) {
3710	ret = PTR_ERR(ptr: group->queues[i]);
3711	group->queues[i] = NULL;
3712	goto err_erase_gid;
3713	}
3714
3715	group->queue_count++;
3716	}
3717
3718	group->idle_queues = GENMASK(group->queue_count - `1`, `0`);
3719
3720	mutex_lock(&sched->reset.lock);
3721	if (atomic_read(v: &sched->reset.in_progress)) {
3722	panthor_group_stop(group);
3723	} else {
3724	mutex_lock(&sched->lock);
3725	list_add_tail(new: &group->run_node,
3726	head: &sched->groups.idle[group->priority]);
3727	mutex_unlock(lock: &sched->lock);
3728	}
3729	mutex_unlock(lock: &sched->reset.lock);
3730
3731	add_group_kbo_sizes(ptdev: group->ptdev, group);
3732	spin_lock_init(&group->fdinfo.lock);
3733
3734	group_init_task_info(group);
3735
3736	xa_set_mark(&gpool->xa, index: gid, GROUP_REGISTERED);
3737
3738	return gid;
3739
3740	err_erase_gid:
3741	xa_erase(&gpool->xa, index: gid);
3742
3743	err_put_group:
3744	group_put(group);
3745	return ret;
3746	}
3747
3748	int panthor_group_destroy(struct panthor_file *pfile, u32 group_handle)
3749	{
3750	struct panthor_group_pool *gpool = pfile->groups;
3751	struct panthor_device *ptdev = pfile->ptdev;
3752	struct panthor_scheduler *sched = ptdev->scheduler;
3753	struct panthor_group *group;
3754
3755	if (!xa_get_mark(&gpool->xa, index: group_handle, GROUP_REGISTERED))
3756	return -EINVAL;
3757
3758	group = xa_erase(&gpool->xa, index: group_handle);
3759	if (!group)
3760	return -EINVAL;
3761
3762	mutex_lock(&sched->reset.lock);
3763	mutex_lock(&sched->lock);
3764	group->destroyed = true;
3765	if (group->csg_id >= `0`) {
3766	sched_queue_delayed_work(sched, tick, `0`);
3767	} else if (!atomic_read(v: &sched->reset.in_progress)) {
3768	/ Remove from the run queues, so the scheduler can't*
3769	* pick the group on the next tick.
3770	*/
3771	list_del_init(entry: &group->run_node);
3772	list_del_init(entry: &group->wait_node);
3773	group_queue_work(group, term);
3774	}
3775	mutex_unlock(lock: &sched->lock);
3776	mutex_unlock(lock: &sched->reset.lock);
3777
3778	group_put(group);
3779	return `0`;
3780	}
3781
3782	static struct panthor_group group_from_handle(struct* panthor_group_pool *pool,
3783	unsigned long group_handle)
3784	{
3785	struct panthor_group *group;
3786
3787	xa_lock(&pool->xa);
3788	group = group_get(group: xa_find(xa: &pool->xa, index: &group_handle, max: group_handle, GROUP_REGISTERED));
3789	xa_unlock(&pool->xa);
3790
3791	return group;
3792	}
3793
3794	int panthor_group_get_state(struct panthor_file *pfile,
3795	struct drm_panthor_group_get_state *get_state)
3796	{
3797	struct panthor_group_pool *gpool = pfile->groups;
3798	struct panthor_device *ptdev = pfile->ptdev;
3799	struct panthor_scheduler *sched = ptdev->scheduler;
3800	struct panthor_group *group;
3801
3802	if (get_state->pad)
3803	return -EINVAL;
3804
3805	group = group_from_handle(pool: gpool, group_handle: get_state->group_handle);
3806	if (!group)
3807	return -EINVAL;
3808
3809	memset(get_state, `0`, sizeof(*get_state));
3810
3811	mutex_lock(&sched->lock);
3812	if (group->timedout)
3813	get_state->state \|= DRM_PANTHOR_GROUP_STATE_TIMEDOUT;
3814	if (group->fatal_queues) {
3815	get_state->state \|= DRM_PANTHOR_GROUP_STATE_FATAL_FAULT;
3816	get_state->fatal_queues = group->fatal_queues;
3817	}
3818	if (group->innocent)
3819	get_state->state \|= DRM_PANTHOR_GROUP_STATE_INNOCENT;
3820	mutex_unlock(lock: &sched->lock);
3821
3822	group_put(group);
3823	return `0`;
3824	}
3825
3826	int panthor_group_pool_create(struct panthor_file *pfile)
3827	{
3828	struct panthor_group_pool *gpool;
3829
3830	gpool = kzalloc(sizeof(*gpool), GFP_KERNEL);
3831	if (!gpool)
3832	return -ENOMEM;
3833
3834	xa_init_flags(xa: &gpool->xa, XA_FLAGS_ALLOC1);
3835	pfile->groups = gpool;
3836	return `0`;
3837	}
3838
3839	void panthor_group_pool_destroy(struct panthor_file *pfile)
3840	{
3841	struct panthor_group_pool *gpool = pfile->groups;
3842	struct panthor_group *group;
3843	unsigned long i;
3844
3845	if (IS_ERR_OR_NULL(ptr: gpool))
3846	return;
3847
3848	xa_for_each(&gpool->xa, i, group)
3849	panthor_group_destroy(pfile, group_handle: i);
3850
3851	xa_destroy(&gpool->xa);
3852	kfree(objp: gpool);
3853	pfile->groups = NULL;
3854	}
3855
3856	/**
3857	* panthor_fdinfo_gather_group_mem_info() - Retrieve aggregate size of all private kernel BO's
3858	* belonging to all the groups owned by an open Panthor file
3859	* @pfile: File.
3860	* @stats: Memory statistics to be updated.
3861	*
3862	*/
3863	void
3864	panthor_fdinfo_gather_group_mem_info(struct panthor_file *pfile,
3865	struct drm_memory_stats *stats)
3866	{
3867	struct panthor_group_pool *gpool = pfile->groups;
3868	struct panthor_group *group;
3869	unsigned long i;
3870
3871	if (IS_ERR_OR_NULL(ptr: gpool))
3872	return;
3873
3874	xa_lock(&gpool->xa);
3875	xa_for_each_marked(&gpool->xa, i, group, GROUP_REGISTERED) {
3876	stats->resident += group->fdinfo.kbo_sizes;
3877	if (group->csg_id >= `0`)
3878	stats->active += group->fdinfo.kbo_sizes;
3879	}
3880	xa_unlock(&gpool->xa);
3881	}
3882
3883	static void job_release(struct kref *ref)
3884	{
3885	struct panthor_job job = container_of(ref, struct* panthor_job, refcount);
3886
3887	drm_WARN_ON(&job->group->ptdev->base, !list_empty(&job->node));
3888
3889	if (job->base.s_fence)
3890	drm_sched_job_cleanup(job: &job->base);
3891
3892	if (job->done_fence && job->done_fence->ops)
3893	dma_fence_put(fence: job->done_fence);
3894	else
3895	dma_fence_free(fence: job->done_fence);
3896
3897	group_put(group: job->group);
3898
3899	kfree(objp: job);
3900	}
3901
3902	struct drm_sched_job panthor_job_get(struct* drm_sched_job *sched_job)
3903	{
3904	if (sched_job) {
3905	struct panthor_job job = container_of(sched_job, struct* panthor_job, base);
3906
3907	kref_get(kref: &job->refcount);
3908	}
3909
3910	return sched_job;
3911	}
3912
3913	void panthor_job_put(struct drm_sched_job *sched_job)
3914	{
3915	struct panthor_job job = container_of(sched_job, struct* panthor_job, base);
3916
3917	if (sched_job)
3918	kref_put(kref: &job->refcount, release: job_release);
3919	}
3920
3921	struct panthor_vm panthor_job_vm(struct* drm_sched_job *sched_job)
3922	{
3923	struct panthor_job job = container_of(sched_job, struct* panthor_job, base);
3924
3925	return job->group->vm;
3926	}
3927
3928	struct drm_sched_job *
3929	panthor_job_create(struct panthor_file *pfile,
3930	u16 group_handle,
3931	const struct drm_panthor_queue_submit *qsubmit,
3932	u64 drm_client_id)
3933	{
3934	struct panthor_group_pool *gpool = pfile->groups;
3935	struct panthor_job *job;
3936	u32 credits;
3937	int ret;
3938
3939	if (qsubmit->pad)
3940	return ERR_PTR(error: -EINVAL);
3941
3942	/ If stream_addr is zero, so stream_size should be. /
3943	if ((qsubmit->stream_size == `0`) != (qsubmit->stream_addr == `0`))
3944	return ERR_PTR(error: -EINVAL);
3945
3946	/ Make sure the address is aligned on 64-byte (cacheline) and the size is*
3947	* aligned on 8-byte (instruction size).
3948	*/
3949	if ((qsubmit->stream_addr & `63`) \|\| (qsubmit->stream_size & `7`))
3950	return ERR_PTR(error: -EINVAL);
3951
3952	/ bits 24:30 must be zero. /
3953	if (qsubmit->latest_flush & GENMASK(`30`, `24`))
3954	return ERR_PTR(error: -EINVAL);
3955
3956	job = kzalloc(sizeof(*job), GFP_KERNEL);
3957	if (!job)
3958	return ERR_PTR(error: -ENOMEM);
3959
3960	kref_init(kref: &job->refcount);
3961	job->queue_idx = qsubmit->queue_index;
3962	job->call_info.size = qsubmit->stream_size;
3963	job->call_info.start = qsubmit->stream_addr;
3964	job->call_info.latest_flush = qsubmit->latest_flush;
3965	INIT_LIST_HEAD(list: &job->node);
3966
3967	job->group = group_from_handle(pool: gpool, group_handle);
3968	if (!job->group) {
3969	ret = -EINVAL;
3970	goto err_put_job;
3971	}
3972
3973	if (!group_can_run(group: job->group)) {
3974	ret = -EINVAL;
3975	goto err_put_job;
3976	}
3977
3978	if (job->queue_idx >= job->group->queue_count \|\|
3979	!job->group->queues[job->queue_idx]) {
3980	ret = -EINVAL;
3981	goto err_put_job;
3982	}
3983
3984	/ Empty command streams don't need a fence, they'll pick the one from*
3985	* the previously submitted job.
3986	*/
3987	if (job->call_info.size) {
3988	job->done_fence = kzalloc(sizeof(*job->done_fence), GFP_KERNEL);
3989	if (!job->done_fence) {
3990	ret = -ENOMEM;
3991	goto err_put_job;
3992	}
3993	}
3994
3995	job->profiling.mask = pfile->ptdev->profile_mask;
3996	credits = calc_job_credits(profile_mask: job->profiling.mask);
3997	if (credits == `0`) {
3998	ret = -EINVAL;
3999	goto err_put_job;
4000	}
4001
4002	ret = drm_sched_job_init(job: &job->base,
4003	entity: &job->group->queues[job->queue_idx]->entity,
4004	credits, owner: job->group, drm_client_id);
4005	if (ret)
4006	goto err_put_job;
4007
4008	return &job->base;
4009
4010	err_put_job:
4011	panthor_job_put(sched_job: &job->base);
4012	return ERR_PTR(error: ret);
4013	}
4014
4015	void panthor_job_update_resvs(struct drm_exec exec, struct* drm_sched_job *sched_job)
4016	{
4017	struct panthor_job job = container_of(sched_job, struct* panthor_job, base);
4018
4019	panthor_vm_update_resvs(vm: job->group->vm, exec, fence: &sched_job->s_fence->finished,
4020	private_usage: DMA_RESV_USAGE_BOOKKEEP, extobj_usage: DMA_RESV_USAGE_BOOKKEEP);
4021	}
4022
4023	void panthor_sched_unplug(struct panthor_device *ptdev)
4024	{
4025	struct panthor_scheduler *sched = ptdev->scheduler;
4026
4027	disable_delayed_work_sync(dwork: &sched->tick_work);
4028	disable_work_sync(work: &sched->fw_events_work);
4029	disable_work_sync(work: &sched->sync_upd_work);
4030
4031	mutex_lock(&sched->lock);
4032	if (sched->pm.has_ref) {
4033	pm_runtime_put(dev: ptdev->base.dev);
4034	sched->pm.has_ref = false;
4035	}
4036	mutex_unlock(lock: &sched->lock);
4037	}
4038
4039	static void panthor_sched_fini(struct drm_device ddev, void* *res)
4040	{
4041	struct panthor_scheduler *sched = res;
4042	int prio;
4043
4044	if (!sched \|\| !sched->csg_slot_count)
4045	return;
4046
4047	if (sched->wq)
4048	destroy_workqueue(wq: sched->wq);
4049
4050	if (sched->heap_alloc_wq)
4051	destroy_workqueue(wq: sched->heap_alloc_wq);
4052
4053	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`; prio >= `0`; prio--) {
4054	drm_WARN_ON(ddev, !list_empty(&sched->groups.runnable[prio]));
4055	drm_WARN_ON(ddev, !list_empty(&sched->groups.idle[prio]));
4056	}
4057
4058	drm_WARN_ON(ddev, !list_empty(&sched->groups.waiting));
4059	}
4060
4061	int panthor_sched_init(struct panthor_device *ptdev)
4062	{
4063	struct panthor_fw_global_iface *glb_iface = panthor_fw_get_glb_iface(ptdev);
4064	struct panthor_fw_csg_iface *csg_iface = panthor_fw_get_csg_iface(ptdev, csg_slot: `0`);
4065	struct panthor_fw_cs_iface *cs_iface = panthor_fw_get_cs_iface(ptdev, csg_slot: `0`, cs_slot: `0`);
4066	struct panthor_scheduler *sched;
4067	u32 gpu_as_count, num_groups;
4068	int prio, ret;
4069
4070	sched = drmm_kzalloc(dev: &ptdev->base, size: sizeof(*sched), GFP_KERNEL);
4071	if (!sched)
4072	return -ENOMEM;
4073
4074	/ The highest bit in JOB_INT_* is reserved for globabl IRQs. That*
4075	* leaves 31 bits for CSG IRQs, hence the MAX_CSGS clamp here.
4076	*/
4077	num_groups = min_t(u32, MAX_CSGS, glb_iface->control->group_num);
4078
4079	/ The FW-side scheduler might deadlock if two groups with the same*
4080	* priority try to access a set of resources that overlaps, with part
4081	* of the resources being allocated to one group and the other part to
4082	* the other group, both groups waiting for the remaining resources to
4083	* be allocated. To avoid that, it is recommended to assign each CSG a
4084	* different priority. In theory we could allow several groups to have
4085	* the same CSG priority if they don't request the same resources, but
4086	* that makes the scheduling logic more complicated, so let's clamp
4087	* the number of CSG slots to MAX_CSG_PRIO + 1 for now.
4088	*/
4089	num_groups = min_t(u32, MAX_CSG_PRIO + `1`, num_groups);
4090
4091	/ We need at least one AS for the MCU and one for the GPU contexts. /
4092	gpu_as_count = hweight32(ptdev->gpu_info.as_present & GENMASK(`31`, `1`));
4093	if (!gpu_as_count) {
4094	drm_err(&ptdev->base, "Not enough AS (%d, expected at least 2)",
4095	gpu_as_count + `1`);
4096	return -EINVAL;
4097	}
4098
4099	sched->ptdev = ptdev;
4100	sched->sb_slot_count = CS_FEATURES_SCOREBOARDS(cs_iface->control->features);
4101	sched->csg_slot_count = num_groups;
4102	sched->cs_slot_count = csg_iface->control->stream_num;
4103	sched->as_slot_count = gpu_as_count;
4104	ptdev->csif_info.csg_slot_count = sched->csg_slot_count;
4105	ptdev->csif_info.cs_slot_count = sched->cs_slot_count;
4106	ptdev->csif_info.scoreboard_slot_count = sched->sb_slot_count;
4107
4108	sched->last_tick = `0`;
4109	sched->resched_target = U64_MAX;
4110	sched->tick_period = msecs_to_jiffies(m: `10`);
4111	INIT_DELAYED_WORK(&sched->tick_work, tick_work);
4112	INIT_WORK(&sched->sync_upd_work, sync_upd_work);
4113	INIT_WORK(&sched->fw_events_work, process_fw_events_work);
4114
4115	ret = drmm_mutex_init(&ptdev->base, &sched->lock);
4116	if (ret)
4117	return ret;
4118
4119	for (prio = PANTHOR_CSG_PRIORITY_COUNT - `1`; prio >= `0`; prio--) {
4120	INIT_LIST_HEAD(list: &sched->groups.runnable[prio]);
4121	INIT_LIST_HEAD(list: &sched->groups.idle[prio]);
4122	}
4123	INIT_LIST_HEAD(list: &sched->groups.waiting);
4124
4125	ret = drmm_mutex_init(&ptdev->base, &sched->reset.lock);
4126	if (ret)
4127	return ret;
4128
4129	INIT_LIST_HEAD(list: &sched->reset.stopped_groups);
4130
4131	/ sched->heap_alloc_wq will be used for heap chunk allocation on*
4132	* tiler OOM events, which means we can't use the same workqueue for
4133	* the scheduler because works queued by the scheduler are in
4134	* the dma-signalling path. Allocate a dedicated heap_alloc_wq to
4135	* work around this limitation.
4136	*
4137	* FIXME: Ultimately, what we need is a failable/non-blocking GEM
4138	* allocation path that we can call when a heap OOM is reported. The
4139	* FW is smart enough to fall back on other methods if the kernel can't
4140	* allocate memory, and fail the tiling job if none of these
4141	* countermeasures worked.
4142	*
4143	* Set WQ_MEM_RECLAIM on sched->wq to unblock the situation when the
4144	* system is running out of memory.
4145	*/
4146	sched->heap_alloc_wq = alloc_workqueue("panthor-heap-alloc", WQ_UNBOUND, `0`);
4147	sched->wq = alloc_workqueue("panthor-csf-sched", WQ_MEM_RECLAIM \| WQ_UNBOUND, `0`);
4148	if (!sched->wq \|\| !sched->heap_alloc_wq) {
4149	panthor_sched_fini(ddev: &ptdev->base, res: sched);
4150	drm_err(&ptdev->base, "Failed to allocate the workqueues");
4151	return -ENOMEM;
4152	}
4153
4154	ret = drmm_add_action_or_reset(&ptdev->base, panthor_sched_fini, sched);
4155	if (ret)
4156	return ret;
4157
4158	ptdev->scheduler = sched;
4159	return `0`;
4160	}
4161

source code of linux/drivers/gpu/drm/panthor/panthor_sched.c