panthor_mmu.c source code [linux/drivers/gpu/drm/panthor/panthor_mmu.c]

1	// SPDX-License-Identifier: GPL-2.0 or MIT
2	/ Copyright 2019 Linaro, Ltd, Rob Herring <robh@kernel.org> /
3	/ Copyright 2023 Collabora ltd. /
4
5	#include <drm/drm_debugfs.h>
6	#include <drm/drm_drv.h>
7	#include <drm/drm_exec.h>
8	#include <drm/drm_gpuvm.h>
9	#include <drm/drm_managed.h>
10	#include <drm/drm_print.h>
11	#include <drm/gpu_scheduler.h>
12	#include <drm/panthor_drm.h>
13
14	#include <linux/atomic.h>
15	#include <linux/bitfield.h>
16	#include <linux/delay.h>
17	#include <linux/dma-mapping.h>
18	#include <linux/interrupt.h>
19	#include <linux/io.h>
20	#include <linux/iopoll.h>
21	#include <linux/io-pgtable.h>
22	#include <linux/iommu.h>
23	#include <linux/kmemleak.h>
24	#include <linux/platform_device.h>
25	#include <linux/pm_runtime.h>
26	#include <linux/rwsem.h>
27	#include <linux/sched.h>
28	#include <linux/shmem_fs.h>
29	#include <linux/sizes.h>
30
31	#include "panthor_device.h"
32	#include "panthor_gem.h"
33	#include "panthor_gpu.h"
34	#include "panthor_heap.h"
35	#include "panthor_mmu.h"
36	#include "panthor_regs.h"
37	#include "panthor_sched.h"
38
39	#define MAX_AS_SLOTS 32
40
41	struct panthor_vm;
42
43	/**
44	* struct panthor_as_slot - Address space slot
45	*/
46	struct panthor_as_slot {
47	/* @vm: VM bound to this slot. NULL is no VM is bound. /
48	struct panthor_vm *vm;
49	};
50
51	/**
52	* struct panthor_mmu - MMU related data
53	*/
54	struct panthor_mmu {
55	/* @irq: The MMU irq. /
56	struct panthor_irq irq;
57
58	/**
59	* @as: Address space related fields.
60	*
61	* The GPU has a limited number of address spaces (AS) slots, forcing
62	* us to re-assign them to re-assign slots on-demand.
63	*/
64	struct {
65	/* @as.slots_lock: Lock protecting access to all other AS fields. /
66	struct mutex slots_lock;
67
68	/* @as.alloc_mask: Bitmask encoding the allocated slots. /
69	unsigned long alloc_mask;
70
71	/* @as.faulty_mask: Bitmask encoding the faulty slots. /
72	unsigned long faulty_mask;
73
74	/* @as.slots: VMs currently bound to the AS slots. /
75	struct panthor_as_slot slots[MAX_AS_SLOTS];
76
77	/**
78	* @as.lru_list: List of least recently used VMs.
79	*
80	* We use this list to pick a VM to evict when all slots are
81	* used.
82	*
83	* There should be no more active VMs than there are AS slots,
84	* so this LRU is just here to keep VMs bound until there's
85	* a need to release a slot, thus avoid unnecessary TLB/cache
86	* flushes.
87	*/
88	struct list_head lru_list;
89	} as;
90
91	/* @vm: VMs management fields /
92	struct {
93	/* @vm.lock: Lock protecting access to list. /
94	struct mutex lock;
95
96	/* @vm.list: List containing all VMs. /
97	struct list_head list;
98
99	/* @vm.reset_in_progress: True if a reset is in progress. /
100	bool reset_in_progress;
101
102	/* @vm.wq: Workqueue used for the VM_BIND queues. /
103	struct workqueue_struct *wq;
104	} vm;
105	};
106
107	/**
108	* struct panthor_vm_pool - VM pool object
109	*/
110	struct panthor_vm_pool {
111	/* @xa: Array used for VM handle tracking. /
112	struct xarray xa;
113	};
114
115	/**
116	* struct panthor_vma - GPU mapping object
117	*
118	* This is used to track GEM mappings in GPU space.
119	*/
120	struct panthor_vma {
121	/* @base: Inherits from drm_gpuva. /
122	struct drm_gpuva base;
123
124	/* @node: Used to implement deferred release of VMAs. /
125	struct list_head node;
126
127	/**
128	* @flags: Combination of drm_panthor_vm_bind_op_flags.
129	*
130	* Only map related flags are accepted.
131	*/
132	u32 flags;
133	};
134
135	/**
136	* struct panthor_vm_op_ctx - VM operation context
137	*
138	* With VM operations potentially taking place in a dma-signaling path, we
139	* need to make sure everything that might require resource allocation is
140	* pre-allocated upfront. This is what this operation context is far.
141	*
142	* We also collect resources that have been freed, so we can release them
143	* asynchronously, and let the VM_BIND scheduler process the next VM_BIND
144	* request.
145	*/
146	struct panthor_vm_op_ctx {
147	/* @rsvd_page_tables: Pages reserved for the MMU page table update. /
148	struct {
149	/* @rsvd_page_tables.count: Number of pages reserved. /
150	u32 count;
151
152	/* @rsvd_page_tables.ptr: Point to the first unused page in the @pages table. /
153	u32 ptr;
154
155	/**
156	* @rsvd_page_tables.pages: Array of pages to be used for an MMU page table update.
157	*
158	* After an VM operation, there might be free pages left in this array.
159	* They should be returned to the pt_cache as part of the op_ctx cleanup.
160	*/
161	void **pages;
162	} rsvd_page_tables;
163
164	/**
165	* @preallocated_vmas: Pre-allocated VMAs to handle the remap case.
166	*
167	* Partial unmap requests or map requests overlapping existing mappings will
168	* trigger a remap call, which need to register up to three panthor_vma objects
169	* (one for the new mapping, and two for the previous and next mappings).
170	*/
171	struct panthor_vma *preallocated_vmas[`3`];
172
173	/* @flags: Combination of drm_panthor_vm_bind_op_flags. /
174	u32 flags;
175
176	/* @va: Virtual range targeted by the VM operation. /
177	struct {
178	/* @va.addr: Start address. /
179	u64 addr;
180
181	/* @va.range: Range size. /
182	u64 range;
183	} va;
184
185	/* @map: Fields specific to a map operation. /
186	struct {
187	/* @map.vm_bo: Buffer object to map. /
188	struct drm_gpuvm_bo *vm_bo;
189
190	/* @map.bo_offset: Offset in the buffer object. /
191	u64 bo_offset;
192
193	/**
194	* @map.sgt: sg-table pointing to pages backing the GEM object.
195	*
196	* This is gathered at job creation time, such that we don't have
197	* to allocate in ::run_job().
198	*/
199	struct sg_table *sgt;
200
201	/**
202	* @map.new_vma: The new VMA object that will be inserted to the VA tree.
203	*/
204	struct panthor_vma *new_vma;
205	} map;
206	};
207
208	/**
209	* struct panthor_vm - VM object
210	*
211	* A VM is an object representing a GPU (or MCU) virtual address space.
212	* It embeds the MMU page table for this address space, a tree containing
213	* all the virtual mappings of GEM objects, and other things needed to manage
214	* the VM.
215	*
216	* Except for the MCU VM, which is managed by the kernel, all other VMs are
217	* created by userspace and mostly managed by userspace, using the
218	* %DRM_IOCTL_PANTHOR_VM_BIND ioctl.
219	*
220	* A portion of the virtual address space is reserved for kernel objects,
221	* like heap chunks, and userspace gets to decide how much of the virtual
222	* address space is left to the kernel (half of the virtual address space
223	* by default).
224	*/
225	struct panthor_vm {
226	/**
227	* @base: Inherit from drm_gpuvm.
228	*
229	* We delegate all the VA management to the common drm_gpuvm framework
230	* and only implement hooks to update the MMU page table.
231	*/
232	struct drm_gpuvm base;
233
234	/**
235	* @sched: Scheduler used for asynchronous VM_BIND request.
236	*
237	* We use a 1:1 scheduler here.
238	*/
239	struct drm_gpu_scheduler sched;
240
241	/**
242	* @entity: Scheduling entity representing the VM_BIND queue.
243	*
244	* There's currently one bind queue per VM. It doesn't make sense to
245	* allow more given the VM operations are serialized anyway.
246	*/
247	struct drm_sched_entity entity;
248
249	/* @ptdev: Device. /
250	struct panthor_device *ptdev;
251
252	/* @memattr: Value to program to the AS_MEMATTR register. /
253	u64 memattr;
254
255	/* @pgtbl_ops: Page table operations. /
256	struct io_pgtable_ops *pgtbl_ops;
257
258	/* @root_page_table: Stores the root page table pointer. /
259	void *root_page_table;
260
261	/**
262	* @op_lock: Lock used to serialize operations on a VM.
263	*
264	* The serialization of jobs queued to the VM_BIND queue is already
265	* taken care of by drm_sched, but we need to serialize synchronous
266	* and asynchronous VM_BIND request. This is what this lock is for.
267	*/
268	struct mutex op_lock;
269
270	/**
271	* @op_ctx: The context attached to the currently executing VM operation.
272	*
273	* NULL when no operation is in progress.
274	*/
275	struct panthor_vm_op_ctx *op_ctx;
276
277	/**
278	* @mm: Memory management object representing the auto-VA/kernel-VA.
279	*
280	* Used to auto-allocate VA space for kernel-managed objects (tiler
281	* heaps, ...).
282	*
283	* For the MCU VM, this is managing the VA range that's used to map
284	* all shared interfaces.
285	*
286	* For user VMs, the range is specified by userspace, and must not
287	* exceed half of the VA space addressable.
288	*/
289	struct drm_mm mm;
290
291	/* @mm_lock: Lock protecting the @mm field. /
292	struct mutex mm_lock;
293
294	/* @kernel_auto_va: Automatic VA-range for kernel BOs. /
295	struct {
296	/* @kernel_auto_va.start: Start of the automatic VA-range for kernel BOs. /
297	u64 start;
298
299	/* @kernel_auto_va.size: Size of the automatic VA-range for kernel BOs. /
300	u64 end;
301	} kernel_auto_va;
302
303	/* @as: Address space related fields. /
304	struct {
305	/**
306	* @as.id: ID of the address space this VM is bound to.
307	*
308	* A value of -1 means the VM is inactive/not bound.
309	*/
310	int id;
311
312	/* @as.active_cnt: Number of active users of this VM. /
313	refcount_t active_cnt;
314
315	/**
316	* @as.lru_node: Used to instead the VM in the panthor_mmu::as::lru_list.
317	*
318	* Active VMs should not be inserted in the LRU list.
319	*/
320	struct list_head lru_node;
321	} as;
322
323	/**
324	* @heaps: Tiler heap related fields.
325	*/
326	struct {
327	/**
328	* @heaps.pool: The heap pool attached to this VM.
329	*
330	* Will stay NULL until someone creates a heap context on this VM.
331	*/
332	struct panthor_heap_pool *pool;
333
334	/* @heaps.lock: Lock used to protect access to @pool. /
335	struct mutex lock;
336	} heaps;
337
338	/* @node: Used to insert the VM in the panthor_mmu::vm::list. /
339	struct list_head node;
340
341	/* @for_mcu: True if this is the MCU VM. /
342	bool for_mcu;
343
344	/**
345	* @destroyed: True if the VM was destroyed.
346	*
347	* No further bind requests should be queued to a destroyed VM.
348	*/
349	bool destroyed;
350
351	/**
352	* @unusable: True if the VM has turned unusable because something
353	* bad happened during an asynchronous request.
354	*
355	* We don't try to recover from such failures, because this implies
356	* informing userspace about the specific operation that failed, and
357	* hoping the userspace driver can replay things from there. This all
358	* sounds very complicated for little gain.
359	*
360	* Instead, we should just flag the VM as unusable, and fail any
361	* further request targeting this VM.
362	*
363	* We also provide a way to query a VM state, so userspace can destroy
364	* it and create a new one.
365	*
366	* As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST
367	* situation, where the logical device needs to be re-created.
368	*/
369	bool unusable;
370
371	/**
372	* @unhandled_fault: Unhandled fault happened.
373	*
374	* This should be reported to the scheduler, and the queue/group be
375	* flagged as faulty as a result.
376	*/
377	bool unhandled_fault;
378	};
379
380	/**
381	* struct panthor_vm_bind_job - VM bind job
382	*/
383	struct panthor_vm_bind_job {
384	/* @base: Inherit from drm_sched_job. /
385	struct drm_sched_job base;
386
387	/* @refcount: Reference count. /
388	struct kref refcount;
389
390	/* @cleanup_op_ctx_work: Work used to cleanup the VM operation context. /
391	struct work_struct cleanup_op_ctx_work;
392
393	/* @vm: VM targeted by the VM operation. /
394	struct panthor_vm *vm;
395
396	/* @ctx: Operation context. /
397	struct panthor_vm_op_ctx ctx;
398	};
399
400	/*
401	* @pt_cache: Cache used to allocate MMU page tables.
402	*
403	* The pre-allocation pattern forces us to over-allocate to plan for
404	* the worst case scenario, and return the pages we didn't use.
405	*
406	* Having a kmem_cache allows us to speed allocations.
407	*/
408	static struct kmem_cache *pt_cache;
409
410	/**
411	* alloc_pt() - Custom page table allocator
412	* @cookie: Cookie passed at page table allocation time.
413	* @size: Size of the page table. This size should be fixed,
414	* and determined at creation time based on the granule size.
415	* @gfp: GFP flags.
416	*
417	* We want a custom allocator so we can use a cache for page table
418	* allocations and amortize the cost of the over-reservation that's
419	* done to allow asynchronous VM operations.
420	*
421	* Return: non-NULL on success, NULL if the allocation failed for any
422	* reason.
423	*/
424	static void alloc_pt(void* *cookie, size_t size, gfp_t gfp)
425	{
426	struct panthor_vm *vm = cookie;
427	void *page;
428
429	/ Allocation of the root page table happening during init. /
430	if (unlikely(!vm->root_page_table)) {
431	struct page *p;
432
433	drm_WARN_ON(&vm->ptdev->base, vm->op_ctx);
434	p = alloc_pages_node(dev_to_node(vm->ptdev->base.dev),
435	gfp \| __GFP_ZERO, get_order(size));
436	page = p ? page_address(p) : NULL;
437	vm->root_page_table = page;
438	return page;
439	}
440
441	/ We're not supposed to have anything bigger than 4k here, because we picked a*
442	* 4k granule size at init time.
443	*/
444	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
445	return NULL;
446
447	/ We must have some op_ctx attached to the VM and it must have at least one*
448	* free page.
449	*/
450	if (drm_WARN_ON(&vm->ptdev->base, !vm->op_ctx) \|\|
451	drm_WARN_ON(&vm->ptdev->base,
452	vm->op_ctx->rsvd_page_tables.ptr >= vm->op_ctx->rsvd_page_tables.count))
453	return NULL;
454
455	page = vm->op_ctx->rsvd_page_tables.pages[vm->op_ctx->rsvd_page_tables.ptr++];
456	memset(page, `0`, SZ_4K);
457
458	/ Page table entries don't use virtual addresses, which trips out*
459	* kmemleak. kmemleak_alloc_phys() might work, but physical addresses
460	* are mixed with other fields, and I fear kmemleak won't detect that
461	* either.
462	*
463	* Let's just ignore memory passed to the page-table driver for now.
464	*/
465	kmemleak_ignore(ptr: page);
466	return page;
467	}
468
469	/**
470	* free_pt() - Custom page table free function
471	* @cookie: Cookie passed at page table allocation time.
472	* @data: Page table to free.
473	* @size: Size of the page table. This size should be fixed,
474	* and determined at creation time based on the granule size.
475	*/
476	static void free_pt(void cookie, void* *data, size_t size)
477	{
478	struct panthor_vm *vm = cookie;
479
480	if (unlikely(vm->root_page_table == data)) {
481	free_pages(addr: (unsigned long)data, order: get_order(size));
482	vm->root_page_table = NULL;
483	return;
484	}
485
486	if (drm_WARN_ON(&vm->ptdev->base, size != SZ_4K))
487	return;
488
489	/ Return the page to the pt_cache. /
490	kmem_cache_free(s: pt_cache, objp: data);
491	}
492
493	static int wait_ready(struct panthor_device *ptdev, u32 as_nr)
494	{
495	int ret;
496	u32 val;
497
498	/ Wait for the MMU status to indicate there is no active command, in*
499	* case one is pending.
500	*/
501	ret = gpu_read_relaxed_poll_timeout_atomic(ptdev, AS_STATUS(as_nr), val,
502	!(val & AS_STATUS_AS_ACTIVE),
503	`10`, `100000`);
504
505	if (ret) {
506	panthor_device_schedule_reset(ptdev);
507	drm_err(&ptdev->base, "AS_ACTIVE bit stuck\n");
508	}
509
510	return ret;
511	}
512
513	static int write_cmd(struct panthor_device *ptdev, u32 as_nr, u32 cmd)
514	{
515	int status;
516
517	/ write AS_COMMAND when MMU is ready to accept another command /
518	status = wait_ready(ptdev, as_nr);
519	if (!status)
520	gpu_write(ptdev, AS_COMMAND(as_nr), data: cmd);
521
522	return status;
523	}
524
525	static void lock_region(struct panthor_device *ptdev, u32 as_nr,
526	u64 region_start, u64 size)
527	{
528	u8 region_width;
529	u64 region;
530	u64 region_end = region_start + size;
531
532	if (!size)
533	return;
534
535	/*
536	* The locked region is a naturally aligned power of 2 block encoded as
537	* log2 minus(1).
538	* Calculate the desired start/end and look for the highest bit which
539	* differs. The smallest naturally aligned block must include this bit
540	* change, the desired region starts with this bit (and subsequent bits)
541	* zeroed and ends with the bit (and subsequent bits) set to one.
542	*/
543	region_width = max(fls64(region_start ^ (region_end - `1`)),
544	const_ilog2(AS_LOCK_REGION_MIN_SIZE)) - `1`;
545
546	/*
547	* Mask off the low bits of region_start (which would be ignored by
548	* the hardware anyway)
549	*/
550	region_start &= GENMASK_ULL(`63`, region_width);
551
552	region = region_width \| region_start;
553
554	/ Lock the region that needs to be updated /
555	gpu_write64(ptdev, AS_LOCKADDR(as_nr), data: region);
556	write_cmd(ptdev, as_nr, AS_COMMAND_LOCK);
557	}
558
559	static int mmu_hw_do_operation_locked(struct panthor_device ptdev, int* as_nr,
560	u64 iova, u64 size, u32 op)
561	{
562	const u32 l2_flush_op = CACHE_CLEAN \| CACHE_INV;
563	u32 lsc_flush_op;
564	int ret;
565
566	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
567
568	switch (op) {
569	case AS_COMMAND_FLUSH_MEM:
570	lsc_flush_op = CACHE_CLEAN \| CACHE_INV;
571	break;
572	case AS_COMMAND_FLUSH_PT:
573	lsc_flush_op = `0`;
574	break;
575	default:
576	drm_WARN(&ptdev->base, `1`, "Unexpected AS_COMMAND: %d", op);
577	return -EINVAL;
578	}
579
580	if (as_nr < `0`)
581	return `0`;
582
583	/*
584	* If the AS number is greater than zero, then we can be sure
585	* the device is up and running, so we don't need to explicitly
586	* power it up
587	*/
588
589	lock_region(ptdev, as_nr, region_start: iova, size);
590
591	ret = wait_ready(ptdev, as_nr);
592	if (ret)
593	return ret;
594
595	ret = panthor_gpu_flush_caches(ptdev, l2: l2_flush_op, lsc: lsc_flush_op, other: `0`);
596	if (ret)
597	return ret;
598
599	/*
600	* Explicitly unlock the region as the AS is not unlocked automatically
601	* at the end of the GPU_CONTROL cache flush command, unlike
602	* AS_COMMAND_FLUSH_MEM or AS_COMMAND_FLUSH_PT.
603	*/
604	write_cmd(ptdev, as_nr, AS_COMMAND_UNLOCK);
605
606	/ Wait for the unlock command to complete /
607	return wait_ready(ptdev, as_nr);
608	}
609
610	static int mmu_hw_do_operation(struct panthor_vm *vm,
611	u64 iova, u64 size, u32 op)
612	{
613	struct panthor_device *ptdev = vm->ptdev;
614	int ret;
615
616	mutex_lock(&ptdev->mmu->as.slots_lock);
617	ret = mmu_hw_do_operation_locked(ptdev, as_nr: vm->as.id, iova, size, op);
618	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
619
620	return ret;
621	}
622
623	static int panthor_mmu_as_enable(struct panthor_device *ptdev, u32 as_nr,
624	u64 transtab, u64 transcfg, u64 memattr)
625	{
626	int ret;
627
628	ret = mmu_hw_do_operation_locked(ptdev, as_nr, iova: `0`, size: ~`0ULL`, AS_COMMAND_FLUSH_MEM);
629	if (ret)
630	return ret;
631
632	gpu_write64(ptdev, AS_TRANSTAB(as_nr), data: transtab);
633	gpu_write64(ptdev, AS_MEMATTR(as_nr), data: memattr);
634	gpu_write64(ptdev, AS_TRANSCFG(as_nr), data: transcfg);
635
636	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
637	}
638
639	static int panthor_mmu_as_disable(struct panthor_device *ptdev, u32 as_nr)
640	{
641	int ret;
642
643	ret = mmu_hw_do_operation_locked(ptdev, as_nr, iova: `0`, size: ~`0ULL`, AS_COMMAND_FLUSH_MEM);
644	if (ret)
645	return ret;
646
647	gpu_write64(ptdev, AS_TRANSTAB(as_nr), data: `0`);
648	gpu_write64(ptdev, AS_MEMATTR(as_nr), data: `0`);
649	gpu_write64(ptdev, AS_TRANSCFG(as_nr), AS_TRANSCFG_ADRMODE_UNMAPPED);
650
651	return write_cmd(ptdev, as_nr, AS_COMMAND_UPDATE);
652	}
653
654	static u32 panthor_mmu_fault_mask(struct panthor_device *ptdev, u32 value)
655	{
656	/ Bits 16 to 31 mean REQ_COMPLETE. /
657	return value & GENMASK(`15`, `0`);
658	}
659
660	static u32 panthor_mmu_as_fault_mask(struct panthor_device *ptdev, u32 as)
661	{
662	return BIT(as);
663	}
664
665	/**
666	* panthor_vm_has_unhandled_faults() - Check if a VM has unhandled faults
667	* @vm: VM to check.
668	*
669	* Return: true if the VM has unhandled faults, false otherwise.
670	*/
671	bool panthor_vm_has_unhandled_faults(struct panthor_vm *vm)
672	{
673	return vm->unhandled_fault;
674	}
675
676	/**
677	* panthor_vm_is_unusable() - Check if the VM is still usable
678	* @vm: VM to check.
679	*
680	* Return: true if the VM is unusable, false otherwise.
681	*/
682	bool panthor_vm_is_unusable(struct panthor_vm *vm)
683	{
684	return vm->unusable;
685	}
686
687	static void panthor_vm_release_as_locked(struct panthor_vm *vm)
688	{
689	struct panthor_device *ptdev = vm->ptdev;
690
691	lockdep_assert_held(&ptdev->mmu->as.slots_lock);
692
693	if (drm_WARN_ON(&ptdev->base, vm->as.id < `0`))
694	return;
695
696	ptdev->mmu->as.slots[vm->as.id].vm = NULL;
697	clear_bit(nr: vm->as.id, addr: &ptdev->mmu->as.alloc_mask);
698	refcount_set(r: &vm->as.active_cnt, n: `0`);
699	list_del_init(entry: &vm->as.lru_node);
700	vm->as.id = -`1`;
701	}
702
703	/**
704	* panthor_vm_active() - Flag a VM as active
705	* @vm: VM to flag as active.
706	*
707	* Assigns an address space to a VM so it can be used by the GPU/MCU.
708	*
709	* Return: 0 on success, a negative error code otherwise.
710	*/
711	int panthor_vm_active(struct panthor_vm *vm)
712	{
713	struct panthor_device *ptdev = vm->ptdev;
714	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
715	struct io_pgtable_cfg *cfg = &io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg;
716	int ret = `0`, as, cookie;
717	u64 transtab, transcfg;
718
719	if (!drm_dev_enter(dev: &ptdev->base, idx: &cookie))
720	return -ENODEV;
721
722	if (refcount_inc_not_zero(r: &vm->as.active_cnt))
723	goto out_dev_exit;
724
725	mutex_lock(&ptdev->mmu->as.slots_lock);
726
727	if (refcount_inc_not_zero(r: &vm->as.active_cnt))
728	goto out_unlock;
729
730	as = vm->as.id;
731	if (as >= `0`) {
732	/ Unhandled pagefault on this AS, the MMU was disabled. We need to*
733	* re-enable the MMU after clearing+unmasking the AS interrupts.
734	*/
735	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as))
736	goto out_enable_as;
737
738	goto out_make_active;
739	}
740
741	/ Check for a free AS /
742	if (vm->for_mcu) {
743	drm_WARN_ON(&ptdev->base, ptdev->mmu->as.alloc_mask & BIT(`0`));
744	as = `0`;
745	} else {
746	as = ffz(ptdev->mmu->as.alloc_mask \| BIT(`0`));
747	}
748
749	if (!(BIT(as) & ptdev->gpu_info.as_present)) {
750	struct panthor_vm *lru_vm;
751
752	lru_vm = list_first_entry_or_null(&ptdev->mmu->as.lru_list,
753	struct panthor_vm,
754	as.lru_node);
755	if (drm_WARN_ON(&ptdev->base, !lru_vm)) {
756	ret = -EBUSY;
757	goto out_unlock;
758	}
759
760	drm_WARN_ON(&ptdev->base, refcount_read(&lru_vm->as.active_cnt));
761	as = lru_vm->as.id;
762	panthor_vm_release_as_locked(vm: lru_vm);
763	}
764
765	/ Assign the free or reclaimed AS to the FD /
766	vm->as.id = as;
767	set_bit(nr: as, addr: &ptdev->mmu->as.alloc_mask);
768	ptdev->mmu->as.slots[as].vm = vm;
769
770	out_enable_as:
771	transtab = cfg->arm_lpae_s1_cfg.ttbr;
772	transcfg = AS_TRANSCFG_PTW_MEMATTR_WB \|
773	AS_TRANSCFG_PTW_RA \|
774	AS_TRANSCFG_ADRMODE_AARCH64_4K \|
775	AS_TRANSCFG_INA_BITS(`55` - va_bits);
776	if (ptdev->coherent)
777	transcfg \|= AS_TRANSCFG_PTW_SH_OS;
778
779	/ If the VM is re-activated, we clear the fault. /
780	vm->unhandled_fault = false;
781
782	/ Unhandled pagefault on this AS, clear the fault and re-enable interrupts*
783	* before enabling the AS.
784	*/
785	if (ptdev->mmu->as.faulty_mask & panthor_mmu_as_fault_mask(ptdev, as)) {
786	gpu_write(ptdev, MMU_INT_CLEAR, data: panthor_mmu_as_fault_mask(ptdev, as));
787	ptdev->mmu->as.faulty_mask &= ~panthor_mmu_as_fault_mask(ptdev, as);
788	ptdev->mmu->irq.mask \|= panthor_mmu_as_fault_mask(ptdev, as);
789	gpu_write(ptdev, MMU_INT_MASK, data: ~ptdev->mmu->as.faulty_mask);
790	}
791
792	ret = panthor_mmu_as_enable(ptdev: vm->ptdev, as_nr: vm->as.id, transtab, transcfg, memattr: vm->memattr);
793
794	out_make_active:
795	if (!ret) {
796	refcount_set(r: &vm->as.active_cnt, n: `1`);
797	list_del_init(entry: &vm->as.lru_node);
798	}
799
800	out_unlock:
801	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
802
803	out_dev_exit:
804	drm_dev_exit(idx: cookie);
805	return ret;
806	}
807
808	/**
809	* panthor_vm_idle() - Flag a VM idle
810	* @vm: VM to flag as idle.
811	*
812	* When we know the GPU is done with the VM (no more jobs to process),
813	* we can relinquish the AS slot attached to this VM, if any.
814	*
815	* We don't release the slot immediately, but instead place the VM in
816	* the LRU list, so it can be evicted if another VM needs an AS slot.
817	* This way, VMs keep attached to the AS they were given until we run
818	* out of free slot, limiting the number of MMU operations (TLB flush
819	* and other AS updates).
820	*/
821	void panthor_vm_idle(struct panthor_vm *vm)
822	{
823	struct panthor_device *ptdev = vm->ptdev;
824
825	if (!refcount_dec_and_mutex_lock(r: &vm->as.active_cnt, lock: &ptdev->mmu->as.slots_lock))
826	return;
827
828	if (!drm_WARN_ON(&ptdev->base, vm->as.id == -`1` \|\| !list_empty(&vm->as.lru_node)))
829	list_add_tail(new: &vm->as.lru_node, head: &ptdev->mmu->as.lru_list);
830
831	refcount_set(r: &vm->as.active_cnt, n: `0`);
832	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
833	}
834
835	u32 panthor_vm_page_size(struct panthor_vm *vm)
836	{
837	const struct io_pgtable *pgt = io_pgtable_ops_to_pgtable(vm->pgtbl_ops);
838	u32 pg_shift = ffs(pgt->cfg.pgsize_bitmap) - `1`;
839
840	return `1u` << pg_shift;
841	}
842
843	static void panthor_vm_stop(struct panthor_vm *vm)
844	{
845	drm_sched_stop(sched: &vm->sched, NULL);
846	}
847
848	static void panthor_vm_start(struct panthor_vm *vm)
849	{
850	drm_sched_start(sched: &vm->sched, errno: `0`);
851	}
852
853	/**
854	* panthor_vm_as() - Get the AS slot attached to a VM
855	* @vm: VM to get the AS slot of.
856	*
857	* Return: -1 if the VM is not assigned an AS slot yet, >= 0 otherwise.
858	*/
859	int panthor_vm_as(struct panthor_vm *vm)
860	{
861	return vm->as.id;
862	}
863
864	static size_t get_pgsize(u64 addr, size_t size, size_t *count)
865	{
866	/*
867	* io-pgtable only operates on multiple pages within a single table
868	* entry, so we need to split at boundaries of the table size, i.e.
869	* the next block size up. The distance from address A to the next
870	* boundary of block size B is logically B - A % B, but in unsigned
871	* two's complement where B is a power of two we get the equivalence
872	* B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
873	*/
874	size_t blk_offset = -addr % SZ_2M;
875
876	if (blk_offset \|\| size < SZ_2M) {
877	*count = min_not_zero(blk_offset, size) / SZ_4K;
878	return SZ_4K;
879	}
880	blk_offset = -addr % SZ_1G ?: SZ_1G;
881	*count = min(blk_offset, size) / SZ_2M;
882	return SZ_2M;
883	}
884
885	static int panthor_vm_flush_range(struct panthor_vm *vm, u64 iova, u64 size)
886	{
887	struct panthor_device *ptdev = vm->ptdev;
888	int ret = `0`, cookie;
889
890	if (vm->as.id < `0`)
891	return `0`;
892
893	/ If the device is unplugged, we just silently skip the flush. /
894	if (!drm_dev_enter(dev: &ptdev->base, idx: &cookie))
895	return `0`;
896
897	ret = mmu_hw_do_operation(vm, iova, size, AS_COMMAND_FLUSH_PT);
898
899	drm_dev_exit(idx: cookie);
900	return ret;
901	}
902
903	static int panthor_vm_unmap_pages(struct panthor_vm *vm, u64 iova, u64 size)
904	{
905	struct panthor_device *ptdev = vm->ptdev;
906	struct io_pgtable_ops *ops = vm->pgtbl_ops;
907	u64 start_iova = iova;
908	u64 offset = `0`;
909
910	while (offset < size) {
911	size_t unmapped_sz = `0`, pgcount;
912	size_t pgsize = get_pgsize(addr: iova + offset, size: size - offset, count: &pgcount);
913
914	unmapped_sz = ops->unmap_pages(ops, iova + offset, pgsize, pgcount, NULL);
915
916	if (drm_WARN_ON(&ptdev->base, unmapped_sz != pgsize * pgcount)) {
917	drm_err(&ptdev->base, "failed to unmap range %llx-%llx (requested range %llx-%llx)\n",
918	iova + offset + unmapped_sz,
919	iova + offset + pgsize * pgcount,
920	iova, iova + size);
921	panthor_vm_flush_range(vm, iova, size: offset + unmapped_sz);
922	return -EINVAL;
923	}
924
925	drm_dbg(&ptdev->base,
926	"unmap: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pgcnt=%zu, pgsz=%zu",
927	vm->as.id, start_iova, size, iova + offset,
928	unmapped_sz / pgsize, pgsize);
929
930	offset += unmapped_sz;
931	}
932
933	return panthor_vm_flush_range(vm, iova, size);
934	}
935
936	static int
937	panthor_vm_map_pages(struct panthor_vm vm, u64 iova, int* prot,
938	struct sg_table *sgt, u64 offset, u64 size)
939	{
940	struct panthor_device *ptdev = vm->ptdev;
941	unsigned int count;
942	struct scatterlist *sgl;
943	struct io_pgtable_ops *ops = vm->pgtbl_ops;
944	u64 start_iova = iova;
945	u64 start_size = size;
946	int ret;
947
948	if (!size)
949	return `0`;
950
951	for_each_sgtable_dma_sg(sgt, sgl, count) {
952	dma_addr_t paddr = sg_dma_address(sgl);
953	size_t len = sg_dma_len(sgl);
954
955	if (len <= offset) {
956	offset -= len;
957	continue;
958	}
959
960	paddr += offset;
961	len -= offset;
962	len = min_t(size_t, len, size);
963	size -= len;
964
965	while (len) {
966	size_t pgcount, mapped = `0`;
967	size_t pgsize = get_pgsize(addr: iova \| paddr, size: len, count: &pgcount);
968
969	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot,
970	GFP_KERNEL, &mapped);
971
972	drm_dbg(&ptdev->base,
973	"map: as=%d, iova=0x%llx, sz=%llu, va=0x%llx, pa=%pad, pgcnt=%zu, pgsz=%zu",
974	vm->as.id, start_iova, start_size, iova, &paddr,
975	mapped / pgsize, pgsize);
976
977	iova += mapped;
978	paddr += mapped;
979	len -= mapped;
980
981	if (drm_WARN_ON(&ptdev->base, !ret && !mapped))
982	ret = -ENOMEM;
983
984	if (ret) {
985	/ If something failed, unmap what we've already mapped before*
986	* returning. The unmap call is not supposed to fail.
987	*/
988	drm_WARN_ON(&ptdev->base,
989	panthor_vm_unmap_pages(vm, start_iova,
990	iova - start_iova));
991	return ret;
992	}
993	}
994
995	if (!size)
996	break;
997
998	offset = `0`;
999	}
1000
1001	return panthor_vm_flush_range(vm, iova: start_iova, size: iova - start_iova);
1002	}
1003
1004	static int flags_to_prot(u32 flags)
1005	{
1006	int prot = `0`;
1007
1008	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC)
1009	prot \|= IOMMU_NOEXEC;
1010
1011	if (!(flags & DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED))
1012	prot \|= IOMMU_CACHE;
1013
1014	if (flags & DRM_PANTHOR_VM_BIND_OP_MAP_READONLY)
1015	prot \|= IOMMU_READ;
1016	else
1017	prot \|= IOMMU_READ \| IOMMU_WRITE;
1018
1019	return prot;
1020	}
1021
1022	/**
1023	* panthor_vm_alloc_va() - Allocate a region in the auto-va space
1024	* @vm: VM to allocate a region on.
1025	* @va: start of the VA range. Can be PANTHOR_VM_KERNEL_AUTO_VA if the user
1026	* wants the VA to be automatically allocated from the auto-VA range.
1027	* @size: size of the VA range.
1028	* @va_node: drm_mm_node to initialize. Must be zero-initialized.
1029	*
1030	* Some GPU objects, like heap chunks, are fully managed by the kernel and
1031	* need to be mapped to the userspace VM, in the region reserved for kernel
1032	* objects.
1033	*
1034	* This function takes care of allocating a region in the kernel auto-VA space.
1035	*
1036	* Return: 0 on success, an error code otherwise.
1037	*/
1038	int
1039	panthor_vm_alloc_va(struct panthor_vm *vm, u64 va, u64 size,
1040	struct drm_mm_node *va_node)
1041	{
1042	ssize_t vm_pgsz = panthor_vm_page_size(vm);
1043	int ret;
1044
1045	if (!size \|\| !IS_ALIGNED(size, vm_pgsz))
1046	return -EINVAL;
1047
1048	if (va != PANTHOR_VM_KERNEL_AUTO_VA && !IS_ALIGNED(va, vm_pgsz))
1049	return -EINVAL;
1050
1051	mutex_lock(&vm->mm_lock);
1052	if (va != PANTHOR_VM_KERNEL_AUTO_VA) {
1053	va_node->start = va;
1054	va_node->size = size;
1055	ret = drm_mm_reserve_node(mm: &vm->mm, node: va_node);
1056	} else {
1057	ret = drm_mm_insert_node_in_range(mm: &vm->mm, node: va_node, size,
1058	alignment: size >= SZ_2M ? SZ_2M : SZ_4K,
1059	color: `0`, start: vm->kernel_auto_va.start,
1060	end: vm->kernel_auto_va.end,
1061	mode: DRM_MM_INSERT_BEST);
1062	}
1063	mutex_unlock(lock: &vm->mm_lock);
1064
1065	return ret;
1066	}
1067
1068	/**
1069	* panthor_vm_free_va() - Free a region allocated with panthor_vm_alloc_va()
1070	* @vm: VM to free the region on.
1071	* @va_node: Memory node representing the region to free.
1072	*/
1073	void panthor_vm_free_va(struct panthor_vm vm, struct* drm_mm_node *va_node)
1074	{
1075	mutex_lock(&vm->mm_lock);
1076	drm_mm_remove_node(node: va_node);
1077	mutex_unlock(lock: &vm->mm_lock);
1078	}
1079
1080	static void panthor_vm_bo_free(struct drm_gpuvm_bo *vm_bo)
1081	{
1082	struct panthor_gem_object *bo = to_panthor_bo(obj: vm_bo->obj);
1083
1084	if (!drm_gem_is_imported(obj: &bo->base.base))
1085	drm_gem_shmem_unpin(shmem: &bo->base);
1086	kfree(objp: vm_bo);
1087	}
1088
1089	static void panthor_vm_cleanup_op_ctx(struct panthor_vm_op_ctx *op_ctx,
1090	struct panthor_vm *vm)
1091	{
1092	u32 remaining_pt_count = op_ctx->rsvd_page_tables.count -
1093	op_ctx->rsvd_page_tables.ptr;
1094
1095	if (remaining_pt_count) {
1096	kmem_cache_free_bulk(s: pt_cache, size: remaining_pt_count,
1097	p: op_ctx->rsvd_page_tables.pages +
1098	op_ctx->rsvd_page_tables.ptr);
1099	}
1100
1101	kfree(objp: op_ctx->rsvd_page_tables.pages);
1102
1103	if (op_ctx->map.vm_bo)
1104	drm_gpuvm_bo_put_deferred(vm_bo: op_ctx->map.vm_bo);
1105
1106	for (u32 i = `0`; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++)
1107	kfree(objp: op_ctx->preallocated_vmas[i]);
1108
1109	drm_gpuvm_bo_deferred_cleanup(gpuvm: &vm->base);
1110	}
1111
1112	static void
1113	panthor_vm_op_ctx_return_vma(struct panthor_vm_op_ctx *op_ctx,
1114	struct panthor_vma *vma)
1115	{
1116	for (u32 i = `0`; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) {
1117	if (!op_ctx->preallocated_vmas[i]) {
1118	op_ctx->preallocated_vmas[i] = vma;
1119	return;
1120	}
1121	}
1122
1123	WARN_ON_ONCE(`1`);
1124	}
1125
1126	static struct panthor_vma *
1127	panthor_vm_op_ctx_get_vma(struct panthor_vm_op_ctx *op_ctx)
1128	{
1129	for (u32 i = `0`; i < ARRAY_SIZE(op_ctx->preallocated_vmas); i++) {
1130	struct panthor_vma *vma = op_ctx->preallocated_vmas[i];
1131
1132	if (vma) {
1133	op_ctx->preallocated_vmas[i] = NULL;
1134	return vma;
1135	}
1136	}
1137
1138	return NULL;
1139	}
1140
1141	static int
1142	panthor_vm_op_ctx_prealloc_vmas(struct panthor_vm_op_ctx *op_ctx)
1143	{
1144	u32 vma_count;
1145
1146	switch (op_ctx->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
1147	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
1148	/ One VMA for the new mapping, and two more VMAs for the remap case*
1149	* which might contain both a prev and next VA.
1150	*/
1151	vma_count = `3`;
1152	break;
1153
1154	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
1155	/ Two VMAs can be needed for an unmap, as an unmap can happen*
1156	* in the middle of a drm_gpuva, requiring a remap with both
1157	* prev & next VA. Or an unmap can span more than one drm_gpuva
1158	* where the first and last ones are covered partially, requring
1159	* a remap for the first with a prev VA and remap for the last
1160	* with a next VA.
1161	*/
1162	vma_count = `2`;
1163	break;
1164
1165	default:
1166	return `0`;
1167	}
1168
1169	for (u32 i = `0`; i < vma_count; i++) {
1170	struct panthor_vma vma = kzalloc(sizeof(vma), GFP_KERNEL);
1171
1172	if (!vma)
1173	return -ENOMEM;
1174
1175	op_ctx->preallocated_vmas[i] = vma;
1176	}
1177
1178	return `0`;
1179	}
1180
1181	#define PANTHOR_VM_BIND_OP_MAP_FLAGS \
1182	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY \| \
1183	DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC \| \
1184	DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED \| \
1185	DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
1186
1187	static int panthor_vm_prepare_map_op_ctx(struct panthor_vm_op_ctx *op_ctx,
1188	struct panthor_vm *vm,
1189	struct panthor_gem_object *bo,
1190	u64 offset,
1191	u64 size, u64 va,
1192	u32 flags)
1193	{
1194	struct drm_gpuvm_bo *preallocated_vm_bo;
1195	struct sg_table *sgt = NULL;
1196	u64 pt_count;
1197	int ret;
1198
1199	if (!bo)
1200	return -EINVAL;
1201
1202	if ((flags & ~PANTHOR_VM_BIND_OP_MAP_FLAGS) \|\|
1203	(flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) != DRM_PANTHOR_VM_BIND_OP_TYPE_MAP)
1204	return -EINVAL;
1205
1206	/ Make sure the VA and size are in-bounds. /
1207	if (size > bo->base.base.size \|\| offset > bo->base.base.size - size)
1208	return -EINVAL;
1209
1210	/ If the BO has an exclusive VM attached, it can't be mapped to other VMs. /
1211	if (bo->exclusive_vm_root_gem &&
1212	bo->exclusive_vm_root_gem != panthor_vm_root_gem(vm))
1213	return -EINVAL;
1214
1215	memset(op_ctx, `0`, sizeof(*op_ctx));
1216	op_ctx->flags = flags;
1217	op_ctx->va.range = size;
1218	op_ctx->va.addr = va;
1219
1220	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
1221	if (ret)
1222	goto err_cleanup;
1223
1224	if (!drm_gem_is_imported(obj: &bo->base.base)) {
1225	/ Pre-reserve the BO pages, so the map operation doesn't have to*
1226	* allocate. This pin is dropped in panthor_vm_bo_free(), so
1227	* once we have successfully called drm_gpuvm_bo_create(),
1228	* GPUVM will take care of dropping the pin for us.
1229	*/
1230	ret = drm_gem_shmem_pin(shmem: &bo->base);
1231	if (ret)
1232	goto err_cleanup;
1233	}
1234
1235	sgt = drm_gem_shmem_get_pages_sgt(shmem: &bo->base);
1236	if (IS_ERR(ptr: sgt)) {
1237	if (!drm_gem_is_imported(obj: &bo->base.base))
1238	drm_gem_shmem_unpin(shmem: &bo->base);
1239
1240	ret = PTR_ERR(ptr: sgt);
1241	goto err_cleanup;
1242	}
1243
1244	op_ctx->map.sgt = sgt;
1245
1246	preallocated_vm_bo = drm_gpuvm_bo_create(gpuvm: &vm->base, obj: &bo->base.base);
1247	if (!preallocated_vm_bo) {
1248	if (!drm_gem_is_imported(obj: &bo->base.base))
1249	drm_gem_shmem_unpin(shmem: &bo->base);
1250
1251	ret = -ENOMEM;
1252	goto err_cleanup;
1253	}
1254
1255	op_ctx->map.vm_bo = drm_gpuvm_bo_obtain_prealloc(vm_bo: preallocated_vm_bo);
1256
1257	op_ctx->map.bo_offset = offset;
1258
1259	/ L1, L2 and L3 page tables.*
1260	* We could optimize L3 allocation by iterating over the sgt and merging
1261	* 2M contiguous blocks, but it's simpler to over-provision and return
1262	* the pages if they're not used.
1263	*/
1264	pt_count = ((ALIGN(va + size, `1ull` << `39`) - ALIGN_DOWN(va, `1ull` << `39`)) >> `39`) +
1265	((ALIGN(va + size, `1ull` << `30`) - ALIGN_DOWN(va, `1ull` << `30`)) >> `30`) +
1266	((ALIGN(va + size, `1ull` << `21`) - ALIGN_DOWN(va, `1ull` << `21`)) >> `21`);
1267
1268	op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
1269	sizeof(*op_ctx->rsvd_page_tables.pages),
1270	GFP_KERNEL);
1271	if (!op_ctx->rsvd_page_tables.pages) {
1272	ret = -ENOMEM;
1273	goto err_cleanup;
1274	}
1275
1276	ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
1277	op_ctx->rsvd_page_tables.pages);
1278	op_ctx->rsvd_page_tables.count = ret;
1279	if (ret != pt_count) {
1280	ret = -ENOMEM;
1281	goto err_cleanup;
1282	}
1283
1284	/ Insert BO into the extobj list last, when we know nothing can fail. /
1285	dma_resv_lock(obj: panthor_vm_resv(vm), NULL);
1286	drm_gpuvm_bo_extobj_add(vm_bo: op_ctx->map.vm_bo);
1287	dma_resv_unlock(obj: panthor_vm_resv(vm));
1288
1289	return `0`;
1290
1291	err_cleanup:
1292	panthor_vm_cleanup_op_ctx(op_ctx, vm);
1293	return ret;
1294	}
1295
1296	static int panthor_vm_prepare_unmap_op_ctx(struct panthor_vm_op_ctx *op_ctx,
1297	struct panthor_vm *vm,
1298	u64 va, u64 size)
1299	{
1300	u32 pt_count = `0`;
1301	int ret;
1302
1303	memset(op_ctx, `0`, sizeof(*op_ctx));
1304	op_ctx->va.range = size;
1305	op_ctx->va.addr = va;
1306	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP;
1307
1308	/ Pre-allocate L3 page tables to account for the split-2M-block*
1309	* situation on unmap.
1310	*/
1311	if (va != ALIGN(va, SZ_2M))
1312	pt_count++;
1313
1314	if (va + size != ALIGN(va + size, SZ_2M) &&
1315	ALIGN(va + size, SZ_2M) != ALIGN(va, SZ_2M))
1316	pt_count++;
1317
1318	ret = panthor_vm_op_ctx_prealloc_vmas(op_ctx);
1319	if (ret)
1320	goto err_cleanup;
1321
1322	if (pt_count) {
1323	op_ctx->rsvd_page_tables.pages = kcalloc(pt_count,
1324	sizeof(*op_ctx->rsvd_page_tables.pages),
1325	GFP_KERNEL);
1326	if (!op_ctx->rsvd_page_tables.pages) {
1327	ret = -ENOMEM;
1328	goto err_cleanup;
1329	}
1330
1331	ret = kmem_cache_alloc_bulk(pt_cache, GFP_KERNEL, pt_count,
1332	op_ctx->rsvd_page_tables.pages);
1333	if (ret != pt_count) {
1334	ret = -ENOMEM;
1335	goto err_cleanup;
1336	}
1337	op_ctx->rsvd_page_tables.count = pt_count;
1338	}
1339
1340	return `0`;
1341
1342	err_cleanup:
1343	panthor_vm_cleanup_op_ctx(op_ctx, vm);
1344	return ret;
1345	}
1346
1347	static void panthor_vm_prepare_sync_only_op_ctx(struct panthor_vm_op_ctx *op_ctx,
1348	struct panthor_vm *vm)
1349	{
1350	memset(op_ctx, `0`, sizeof(*op_ctx));
1351	op_ctx->flags = DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY;
1352	}
1353
1354	/**
1355	* panthor_vm_get_bo_for_va() - Get the GEM object mapped at a virtual address
1356	* @vm: VM to look into.
1357	* @va: Virtual address to search for.
1358	* @bo_offset: Offset of the GEM object mapped at this virtual address.
1359	* Only valid on success.
1360	*
1361	* The object returned by this function might no longer be mapped when the
1362	* function returns. It's the caller responsibility to ensure there's no
1363	* concurrent map/unmap operations making the returned value invalid, or
1364	* make sure it doesn't matter if the object is no longer mapped.
1365	*
1366	* Return: A valid pointer on success, an ERR_PTR() otherwise.
1367	*/
1368	struct panthor_gem_object *
1369	panthor_vm_get_bo_for_va(struct panthor_vm vm, u64 va, u64 bo_offset)
1370	{
1371	struct panthor_gem_object *bo = ERR_PTR(error: -ENOENT);
1372	struct drm_gpuva *gpuva;
1373	struct panthor_vma *vma;
1374
1375	/ Take the VM lock to prevent concurrent map/unmap operations. /
1376	mutex_lock(&vm->op_lock);
1377	gpuva = drm_gpuva_find_first(gpuvm: &vm->base, addr: va, range: `1`);
1378	vma = gpuva ? container_of(gpuva, struct panthor_vma, base) : NULL;
1379	if (vma && vma->base.gem.obj) {
1380	drm_gem_object_get(obj: vma->base.gem.obj);
1381	bo = to_panthor_bo(obj: vma->base.gem.obj);
1382	*bo_offset = vma->base.gem.offset + (va - vma->base.va.addr);
1383	}
1384	mutex_unlock(lock: &vm->op_lock);
1385
1386	return bo;
1387	}
1388
1389	#define PANTHOR_VM_MIN_KERNEL_VA_SIZE SZ_256M
1390
1391	static u64
1392	panthor_vm_create_get_user_va_range(const struct drm_panthor_vm_create *args,
1393	u64 full_va_range)
1394	{
1395	u64 user_va_range;
1396
1397	/ Make sure we have a minimum amount of VA space for kernel objects. /
1398	if (full_va_range < PANTHOR_VM_MIN_KERNEL_VA_SIZE)
1399	return `0`;
1400
1401	if (args->user_va_range) {
1402	/ Use the user provided value if != 0. /
1403	user_va_range = args->user_va_range;
1404	} else if (TASK_SIZE_OF(current) < full_va_range) {
1405	/ If the task VM size is smaller than the GPU VA range, pick this*
1406	* as our default user VA range, so userspace can CPU/GPU map buffers
1407	* at the same address.
1408	*/
1409	user_va_range = TASK_SIZE_OF(current);
1410	} else {
1411	/ If the GPU VA range is smaller than the task VM size, we*
1412	* just have to live with the fact we won't be able to map
1413	* all buffers at the same GPU/CPU address.
1414	*
1415	* If the GPU VA range is bigger than 4G (more than 32-bit of
1416	* VA), we split the range in two, and assign half of it to
1417	* the user and the other half to the kernel, if it's not, we
1418	* keep the kernel VA space as small as possible.
1419	*/
1420	user_va_range = full_va_range > SZ_4G ?
1421	full_va_range / `2` :
1422	full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
1423	}
1424
1425	if (full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE < user_va_range)
1426	user_va_range = full_va_range - PANTHOR_VM_MIN_KERNEL_VA_SIZE;
1427
1428	return user_va_range;
1429	}
1430
1431	#define PANTHOR_VM_CREATE_FLAGS 0
1432
1433	static int
1434	panthor_vm_create_check_args(const struct panthor_device *ptdev,
1435	const struct drm_panthor_vm_create *args,
1436	u64 kernel_va_start, u64 kernel_va_range)
1437	{
1438	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
1439	u64 full_va_range = `1ull` << va_bits;
1440	u64 user_va_range;
1441
1442	if (args->flags & ~PANTHOR_VM_CREATE_FLAGS)
1443	return -EINVAL;
1444
1445	user_va_range = panthor_vm_create_get_user_va_range(args, full_va_range);
1446	if (!user_va_range \|\| (args->user_va_range && args->user_va_range > user_va_range))
1447	return -EINVAL;
1448
1449	/ Pick a kernel VA range that's a power of two, to have a clear split. /
1450	*kernel_va_range = rounddown_pow_of_two(full_va_range - user_va_range);
1451	kernel_va_start = full_va_range - kernel_va_range;
1452	return `0`;
1453	}
1454
1455	/*
1456	* Only 32 VMs per open file. If that becomes a limiting factor, we can
1457	* increase this number.
1458	*/
1459	#define PANTHOR_MAX_VMS_PER_FILE 32
1460
1461	/**
1462	* panthor_vm_pool_create_vm() - Create a VM
1463	* @ptdev: The panthor device
1464	* @pool: The VM to create this VM on.
1465	* @args: VM creation args.
1466	*
1467	* Return: a positive VM ID on success, a negative error code otherwise.
1468	*/
1469	int panthor_vm_pool_create_vm(struct panthor_device *ptdev,
1470	struct panthor_vm_pool *pool,
1471	struct drm_panthor_vm_create *args)
1472	{
1473	u64 kernel_va_start, kernel_va_range;
1474	struct panthor_vm *vm;
1475	int ret;
1476	u32 id;
1477
1478	ret = panthor_vm_create_check_args(ptdev, args, kernel_va_start: &kernel_va_start, kernel_va_range: &kernel_va_range);
1479	if (ret)
1480	return ret;
1481
1482	vm = panthor_vm_create(ptdev, for_mcu: false, kernel_va_start, kernel_va_size: kernel_va_range,
1483	kernel_auto_va_start: kernel_va_start, kernel_auto_va_size: kernel_va_range);
1484	if (IS_ERR(ptr: vm))
1485	return PTR_ERR(ptr: vm);
1486
1487	ret = xa_alloc(xa: &pool->xa, id: &id, entry: vm,
1488	XA_LIMIT(`1`, PANTHOR_MAX_VMS_PER_FILE), GFP_KERNEL);
1489
1490	if (ret) {
1491	panthor_vm_put(vm);
1492	return ret;
1493	}
1494
1495	args->user_va_range = kernel_va_start;
1496	return id;
1497	}
1498
1499	static void panthor_vm_destroy(struct panthor_vm *vm)
1500	{
1501	if (!vm)
1502	return;
1503
1504	vm->destroyed = true;
1505
1506	mutex_lock(&vm->heaps.lock);
1507	panthor_heap_pool_destroy(pool: vm->heaps.pool);
1508	vm->heaps.pool = NULL;
1509	mutex_unlock(lock: &vm->heaps.lock);
1510
1511	drm_WARN_ON(&vm->ptdev->base,
1512	panthor_vm_unmap_range(vm, vm->base.mm_start, vm->base.mm_range));
1513	panthor_vm_put(vm);
1514	}
1515
1516	/**
1517	* panthor_vm_pool_destroy_vm() - Destroy a VM.
1518	* @pool: VM pool.
1519	* @handle: VM handle.
1520	*
1521	* This function doesn't free the VM object or its resources, it just kills
1522	* all mappings, and makes sure nothing can be mapped after that point.
1523	*
1524	* If there was any active jobs at the time this function is called, these
1525	* jobs should experience page faults and be killed as a result.
1526	*
1527	* The VM resources are freed when the last reference on the VM object is
1528	* dropped.
1529	*
1530	* Return: %0 for success, negative errno value for failure
1531	*/
1532	int panthor_vm_pool_destroy_vm(struct panthor_vm_pool *pool, u32 handle)
1533	{
1534	struct panthor_vm *vm;
1535
1536	vm = xa_erase(&pool->xa, index: handle);
1537
1538	panthor_vm_destroy(vm);
1539
1540	return vm ? `0` : -EINVAL;
1541	}
1542
1543	/**
1544	* panthor_vm_pool_get_vm() - Retrieve VM object bound to a VM handle
1545	* @pool: VM pool to check.
1546	* @handle: Handle of the VM to retrieve.
1547	*
1548	* Return: A valid pointer if the VM exists, NULL otherwise.
1549	*/
1550	struct panthor_vm *
1551	panthor_vm_pool_get_vm(struct panthor_vm_pool *pool, u32 handle)
1552	{
1553	struct panthor_vm *vm;
1554
1555	xa_lock(&pool->xa);
1556	vm = panthor_vm_get(vm: xa_load(&pool->xa, index: handle));
1557	xa_unlock(&pool->xa);
1558
1559	return vm;
1560	}
1561
1562	/**
1563	* panthor_vm_pool_destroy() - Destroy a VM pool.
1564	* @pfile: File.
1565	*
1566	* Destroy all VMs in the pool, and release the pool resources.
1567	*
1568	* Note that VMs can outlive the pool they were created from if other
1569	* objects hold a reference to there VMs.
1570	*/
1571	void panthor_vm_pool_destroy(struct panthor_file *pfile)
1572	{
1573	struct panthor_vm *vm;
1574	unsigned long i;
1575
1576	if (!pfile->vms)
1577	return;
1578
1579	xa_for_each(&pfile->vms->xa, i, vm)
1580	panthor_vm_destroy(vm);
1581
1582	xa_destroy(&pfile->vms->xa);
1583	kfree(objp: pfile->vms);
1584	}
1585
1586	/**
1587	* panthor_vm_pool_create() - Create a VM pool
1588	* @pfile: File.
1589	*
1590	* Return: 0 on success, a negative error code otherwise.
1591	*/
1592	int panthor_vm_pool_create(struct panthor_file *pfile)
1593	{
1594	pfile->vms = kzalloc(sizeof(*pfile->vms), GFP_KERNEL);
1595	if (!pfile->vms)
1596	return -ENOMEM;
1597
1598	xa_init_flags(xa: &pfile->vms->xa, XA_FLAGS_ALLOC1);
1599	return `0`;
1600	}
1601
1602	/ dummy TLB ops, the real TLB flush happens in panthor_vm_flush_range() /
1603	static void mmu_tlb_flush_all(void *cookie)
1604	{
1605	}
1606
1607	static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, void *cookie)
1608	{
1609	}
1610
1611	static const struct iommu_flush_ops mmu_tlb_ops = {
1612	.tlb_flush_all = mmu_tlb_flush_all,
1613	.tlb_flush_walk = mmu_tlb_flush_walk,
1614	};
1615
1616	static const char access_type_name(struct* panthor_device *ptdev,
1617	u32 fault_status)
1618	{
1619	switch (fault_status & AS_FAULTSTATUS_ACCESS_TYPE_MASK) {
1620	case AS_FAULTSTATUS_ACCESS_TYPE_ATOMIC:
1621	return "ATOMIC";
1622	case AS_FAULTSTATUS_ACCESS_TYPE_READ:
1623	return "READ";
1624	case AS_FAULTSTATUS_ACCESS_TYPE_WRITE:
1625	return "WRITE";
1626	case AS_FAULTSTATUS_ACCESS_TYPE_EX:
1627	return "EXECUTE";
1628	default:
1629	drm_WARN_ON(&ptdev->base, `1`);
1630	return NULL;
1631	}
1632	}
1633
1634	static void panthor_mmu_irq_handler(struct panthor_device *ptdev, u32 status)
1635	{
1636	bool has_unhandled_faults = false;
1637
1638	status = panthor_mmu_fault_mask(ptdev, value: status);
1639	while (status) {
1640	u32 as = ffs(status \| (status >> `16`)) - `1`;
1641	u32 mask = panthor_mmu_as_fault_mask(ptdev, as);
1642	u32 new_int_mask;
1643	u64 addr;
1644	u32 fault_status;
1645	u32 exception_type;
1646	u32 access_type;
1647	u32 source_id;
1648
1649	fault_status = gpu_read(ptdev, AS_FAULTSTATUS(as));
1650	addr = gpu_read64(ptdev, AS_FAULTADDRESS(as));
1651
1652	/ decode the fault status /
1653	exception_type = fault_status & `0xFF`;
1654	access_type = (fault_status >> `8`) & `0x3`;
1655	source_id = (fault_status >> `16`);
1656
1657	mutex_lock(&ptdev->mmu->as.slots_lock);
1658
1659	ptdev->mmu->as.faulty_mask \|= mask;
1660	new_int_mask =
1661	panthor_mmu_fault_mask(ptdev, value: ~ptdev->mmu->as.faulty_mask);
1662
1663	/ terminal fault, print info about the fault /
1664	drm_err(&ptdev->base,
1665	"Unhandled Page fault in AS%d at VA 0x%016llX\n"
1666	"raw fault status: 0x%X\n"
1667	"decoded fault status: %s\n"
1668	"exception type 0x%X: %s\n"
1669	"access type 0x%X: %s\n"
1670	"source id 0x%X\n",
1671	as, addr,
1672	fault_status,
1673	(fault_status & (`1` << `10`) ? "DECODER FAULT" : "SLAVE FAULT"),
1674	exception_type, panthor_exception_name(ptdev, exception_type),
1675	access_type, access_type_name(ptdev, fault_status),
1676	source_id);
1677
1678	/ We don't handle VM faults at the moment, so let's just clear the*
1679	* interrupt and let the writer/reader crash.
1680	* Note that COMPLETED irqs are never cleared, but this is fine
1681	* because they are always masked.
1682	*/
1683	gpu_write(ptdev, MMU_INT_CLEAR, data: mask);
1684
1685	/ Ignore MMU interrupts on this AS until it's been*
1686	* re-enabled.
1687	*/
1688	ptdev->mmu->irq.mask = new_int_mask;
1689
1690	if (ptdev->mmu->as.slots[as].vm)
1691	ptdev->mmu->as.slots[as].vm->unhandled_fault = true;
1692
1693	/ Disable the MMU to kill jobs on this AS. /
1694	panthor_mmu_as_disable(ptdev, as_nr: as);
1695	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
1696
1697	status &= ~mask;
1698	has_unhandled_faults = true;
1699	}
1700
1701	if (has_unhandled_faults)
1702	panthor_sched_report_mmu_fault(ptdev);
1703	}
1704	PANTHOR_IRQ_HANDLER(mmu, MMU, panthor_mmu_irq_handler);
1705
1706	/**
1707	* panthor_mmu_suspend() - Suspend the MMU logic
1708	* @ptdev: Device.
1709	*
1710	* All we do here is de-assign the AS slots on all active VMs, so things
1711	* get flushed to the main memory, and no further access to these VMs are
1712	* possible.
1713	*
1714	* We also suspend the MMU IRQ.
1715	*/
1716	void panthor_mmu_suspend(struct panthor_device *ptdev)
1717	{
1718	mutex_lock(&ptdev->mmu->as.slots_lock);
1719	for (u32 i = `0`; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
1720	struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
1721
1722	if (vm) {
1723	drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
1724	panthor_vm_release_as_locked(vm);
1725	}
1726	}
1727	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
1728
1729	panthor_mmu_irq_suspend(pirq: &ptdev->mmu->irq);
1730	}
1731
1732	/**
1733	* panthor_mmu_resume() - Resume the MMU logic
1734	* @ptdev: Device.
1735	*
1736	* Resume the IRQ.
1737	*
1738	* We don't re-enable previously active VMs. We assume other parts of the
1739	* driver will call panthor_vm_active() on the VMs they intend to use.
1740	*/
1741	void panthor_mmu_resume(struct panthor_device *ptdev)
1742	{
1743	mutex_lock(&ptdev->mmu->as.slots_lock);
1744	ptdev->mmu->as.alloc_mask = `0`;
1745	ptdev->mmu->as.faulty_mask = `0`;
1746	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
1747
1748	panthor_mmu_irq_resume(pirq: &ptdev->mmu->irq, mask: panthor_mmu_fault_mask(ptdev, value: ~`0`));
1749	}
1750
1751	/**
1752	* panthor_mmu_pre_reset() - Prepare for a reset
1753	* @ptdev: Device.
1754	*
1755	* Suspend the IRQ, and make sure all VM_BIND queues are stopped, so we
1756	* don't get asked to do a VM operation while the GPU is down.
1757	*
1758	* We don't cleanly shutdown the AS slots here, because the reset might
1759	* come from an AS_ACTIVE_BIT stuck situation.
1760	*/
1761	void panthor_mmu_pre_reset(struct panthor_device *ptdev)
1762	{
1763	struct panthor_vm *vm;
1764
1765	panthor_mmu_irq_suspend(pirq: &ptdev->mmu->irq);
1766
1767	mutex_lock(&ptdev->mmu->vm.lock);
1768	ptdev->mmu->vm.reset_in_progress = true;
1769	list_for_each_entry(vm, &ptdev->mmu->vm.list, node)
1770	panthor_vm_stop(vm);
1771	mutex_unlock(lock: &ptdev->mmu->vm.lock);
1772	}
1773
1774	/**
1775	* panthor_mmu_post_reset() - Restore things after a reset
1776	* @ptdev: Device.
1777	*
1778	* Put the MMU logic back in action after a reset. That implies resuming the
1779	* IRQ and re-enabling the VM_BIND queues.
1780	*/
1781	void panthor_mmu_post_reset(struct panthor_device *ptdev)
1782	{
1783	struct panthor_vm *vm;
1784
1785	mutex_lock(&ptdev->mmu->as.slots_lock);
1786
1787	/ Now that the reset is effective, we can assume that none of the*
1788	* AS slots are setup, and clear the faulty flags too.
1789	*/
1790	ptdev->mmu->as.alloc_mask = `0`;
1791	ptdev->mmu->as.faulty_mask = `0`;
1792
1793	for (u32 i = `0`; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
1794	struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
1795
1796	if (vm)
1797	panthor_vm_release_as_locked(vm);
1798	}
1799
1800	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
1801
1802	panthor_mmu_irq_resume(pirq: &ptdev->mmu->irq, mask: panthor_mmu_fault_mask(ptdev, value: ~`0`));
1803
1804	/ Restart the VM_BIND queues. /
1805	mutex_lock(&ptdev->mmu->vm.lock);
1806	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
1807	panthor_vm_start(vm);
1808	}
1809	ptdev->mmu->vm.reset_in_progress = false;
1810	mutex_unlock(lock: &ptdev->mmu->vm.lock);
1811	}
1812
1813	static void panthor_vm_free(struct drm_gpuvm *gpuvm)
1814	{
1815	struct panthor_vm vm = container_of(gpuvm, struct* panthor_vm, base);
1816	struct panthor_device *ptdev = vm->ptdev;
1817
1818	mutex_lock(&vm->heaps.lock);
1819	if (drm_WARN_ON(&ptdev->base, vm->heaps.pool))
1820	panthor_heap_pool_destroy(pool: vm->heaps.pool);
1821	mutex_unlock(lock: &vm->heaps.lock);
1822	mutex_destroy(lock: &vm->heaps.lock);
1823
1824	mutex_lock(&ptdev->mmu->vm.lock);
1825	list_del(entry: &vm->node);
1826	/ Restore the scheduler state so we can call drm_sched_entity_destroy()*
1827	* and drm_sched_fini(). If get there, that means we have no job left
1828	* and no new jobs can be queued, so we can start the scheduler without
1829	* risking interfering with the reset.
1830	*/
1831	if (ptdev->mmu->vm.reset_in_progress)
1832	panthor_vm_start(vm);
1833	mutex_unlock(lock: &ptdev->mmu->vm.lock);
1834
1835	drm_sched_entity_destroy(entity: &vm->entity);
1836	drm_sched_fini(sched: &vm->sched);
1837
1838	mutex_lock(&ptdev->mmu->as.slots_lock);
1839	if (vm->as.id >= `0`) {
1840	int cookie;
1841
1842	if (drm_dev_enter(dev: &ptdev->base, idx: &cookie)) {
1843	panthor_mmu_as_disable(ptdev, as_nr: vm->as.id);
1844	drm_dev_exit(idx: cookie);
1845	}
1846
1847	ptdev->mmu->as.slots[vm->as.id].vm = NULL;
1848	clear_bit(nr: vm->as.id, addr: &ptdev->mmu->as.alloc_mask);
1849	list_del(entry: &vm->as.lru_node);
1850	}
1851	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
1852
1853	free_io_pgtable_ops(ops: vm->pgtbl_ops);
1854
1855	drm_mm_takedown(mm: &vm->mm);
1856	kfree(objp: vm);
1857	}
1858
1859	/**
1860	* panthor_vm_put() - Release a reference on a VM
1861	* @vm: VM to release the reference on. Can be NULL.
1862	*/
1863	void panthor_vm_put(struct panthor_vm *vm)
1864	{
1865	drm_gpuvm_put(gpuvm: vm ? &vm->base : NULL);
1866	}
1867
1868	/**
1869	* panthor_vm_get() - Get a VM reference
1870	* @vm: VM to get the reference on. Can be NULL.
1871	*
1872	* Return: @vm value.
1873	*/
1874	struct panthor_vm panthor_vm_get(struct* panthor_vm *vm)
1875	{
1876	if (vm)
1877	drm_gpuvm_get(gpuvm: &vm->base);
1878
1879	return vm;
1880	}
1881
1882	/**
1883	* panthor_vm_get_heap_pool() - Get the heap pool attached to a VM
1884	* @vm: VM to query the heap pool on.
1885	* @create: True if the heap pool should be created when it doesn't exist.
1886	*
1887	* Heap pools are per-VM. This function allows one to retrieve the heap pool
1888	* attached to a VM.
1889	*
1890	* If no heap pool exists yet, and @create is true, we create one.
1891	*
1892	* The returned panthor_heap_pool should be released with panthor_heap_pool_put().
1893	*
1894	* Return: A valid pointer on success, an ERR_PTR() otherwise.
1895	*/
1896	struct panthor_heap_pool panthor_vm_get_heap_pool(struct* panthor_vm *vm, bool create)
1897	{
1898	struct panthor_heap_pool *pool;
1899
1900	mutex_lock(&vm->heaps.lock);
1901	if (!vm->heaps.pool && create) {
1902	if (vm->destroyed)
1903	pool = ERR_PTR(error: -EINVAL);
1904	else
1905	pool = panthor_heap_pool_create(ptdev: vm->ptdev, vm);
1906
1907	if (!IS_ERR(ptr: pool))
1908	vm->heaps.pool = panthor_heap_pool_get(pool);
1909	} else {
1910	pool = panthor_heap_pool_get(pool: vm->heaps.pool);
1911	if (!pool)
1912	pool = ERR_PTR(error: -ENOENT);
1913	}
1914	mutex_unlock(lock: &vm->heaps.lock);
1915
1916	return pool;
1917	}
1918
1919	/**
1920	* panthor_vm_heaps_sizes() - Calculate size of all heap chunks across all
1921	* heaps over all the heap pools in a VM
1922	* @pfile: File.
1923	* @stats: Memory stats to be updated.
1924	*
1925	* Calculate all heap chunk sizes in all heap pools bound to a VM. If the VM
1926	* is active, record the size as active as well.
1927	*/
1928	void panthor_vm_heaps_sizes(struct panthor_file pfile, struct* drm_memory_stats *stats)
1929	{
1930	struct panthor_vm *vm;
1931	unsigned long i;
1932
1933	if (!pfile->vms)
1934	return;
1935
1936	xa_lock(&pfile->vms->xa);
1937	xa_for_each(&pfile->vms->xa, i, vm) {
1938	size_t size = panthor_heap_pool_size(pool: vm->heaps.pool);
1939	stats->resident += size;
1940	if (vm->as.id >= `0`)
1941	stats->active += size;
1942	}
1943	xa_unlock(&pfile->vms->xa);
1944	}
1945
1946	static u64 mair_to_memattr(u64 mair, bool coherent)
1947	{
1948	u64 memattr = `0`;
1949	u32 i;
1950
1951	for (i = `0`; i < `8`; i++) {
1952	u8 in_attr = mair >> (`8` * i), out_attr;
1953	u8 outer = in_attr >> `4`, inner = in_attr & `0xf`;
1954
1955	/ For caching to be enabled, inner and outer caching policy*
1956	* have to be both write-back, if one of them is write-through
1957	* or non-cacheable, we just choose non-cacheable. Device
1958	* memory is also translated to non-cacheable.
1959	*/
1960	if (!(outer & `3`) \|\| !(outer & `4`) \|\| !(inner & `4`)) {
1961	out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_NC \|
1962	AS_MEMATTR_AARCH64_SH_MIDGARD_INNER \|
1963	AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(false, false);
1964	} else {
1965	out_attr = AS_MEMATTR_AARCH64_INNER_OUTER_WB \|
1966	AS_MEMATTR_AARCH64_INNER_ALLOC_EXPL(inner & `1`, inner & `2`);
1967	/ Use SH_MIDGARD_INNER mode when device isn't coherent,*
1968	* so SH_IS, which is used when IOMMU_CACHE is set, maps
1969	* to Mali's internal-shareable mode. As per the Mali
1970	* Spec, inner and outer-shareable modes aren't allowed
1971	* for WB memory when coherency is disabled.
1972	* Use SH_CPU_INNER mode when coherency is enabled, so
1973	* that SH_IS actually maps to the standard definition of
1974	* inner-shareable.
1975	*/
1976	if (!coherent)
1977	out_attr \|= AS_MEMATTR_AARCH64_SH_MIDGARD_INNER;
1978	else
1979	out_attr \|= AS_MEMATTR_AARCH64_SH_CPU_INNER;
1980	}
1981
1982	memattr \|= (u64)out_attr << (`8` * i);
1983	}
1984
1985	return memattr;
1986	}
1987
1988	static void panthor_vma_link(struct panthor_vm *vm,
1989	struct panthor_vma *vma,
1990	struct drm_gpuvm_bo *vm_bo)
1991	{
1992	struct panthor_gem_object *bo = to_panthor_bo(obj: vma->base.gem.obj);
1993
1994	mutex_lock(&bo->base.base.gpuva.lock);
1995	drm_gpuva_link(va: &vma->base, vm_bo);
1996	mutex_unlock(lock: &bo->base.base.gpuva.lock);
1997	}
1998
1999	static void panthor_vma_unlink(struct panthor_vma *vma)
2000	{
2001	drm_gpuva_unlink_defer(va: &vma->base);
2002	kfree(objp: vma);
2003	}
2004
2005	static void panthor_vma_init(struct panthor_vma *vma, u32 flags)
2006	{
2007	INIT_LIST_HEAD(list: &vma->node);
2008	vma->flags = flags;
2009	}
2010
2011	#define PANTHOR_VM_MAP_FLAGS \
2012	(DRM_PANTHOR_VM_BIND_OP_MAP_READONLY \| \
2013	DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC \| \
2014	DRM_PANTHOR_VM_BIND_OP_MAP_UNCACHED)
2015
2016	static int panthor_gpuva_sm_step_map(struct drm_gpuva_op op, void* *priv)
2017	{
2018	struct panthor_vm *vm = priv;
2019	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
2020	struct panthor_vma *vma = panthor_vm_op_ctx_get_vma(op_ctx);
2021	int ret;
2022
2023	if (!vma)
2024	return -EINVAL;
2025
2026	panthor_vma_init(vma, flags: op_ctx->flags & PANTHOR_VM_MAP_FLAGS);
2027
2028	ret = panthor_vm_map_pages(vm, iova: op->map.va.addr, prot: flags_to_prot(flags: vma->flags),
2029	sgt: op_ctx->map.sgt, offset: op->map.gem.offset,
2030	size: op->map.va.range);
2031	if (ret) {
2032	panthor_vm_op_ctx_return_vma(op_ctx, vma);
2033	return ret;
2034	}
2035
2036	drm_gpuva_map(gpuvm: &vm->base, va: &vma->base, op: &op->map);
2037	panthor_vma_link(vm, vma, vm_bo: op_ctx->map.vm_bo);
2038
2039	drm_gpuvm_bo_put_deferred(vm_bo: op_ctx->map.vm_bo);
2040	op_ctx->map.vm_bo = NULL;
2041
2042	return `0`;
2043	}
2044
2045	static int panthor_gpuva_sm_step_remap(struct drm_gpuva_op *op,
2046	void *priv)
2047	{
2048	struct panthor_vma unmap_vma = container_of(op->remap.unmap->va, struct* panthor_vma, base);
2049	struct panthor_vm *vm = priv;
2050	struct panthor_vm_op_ctx *op_ctx = vm->op_ctx;
2051	struct panthor_vma prev_vma = NULL, next_vma = NULL;
2052	u64 unmap_start, unmap_range;
2053	int ret;
2054
2055	drm_gpuva_op_remap_to_unmap_range(op: &op->remap, start_addr: &unmap_start, range: &unmap_range);
2056	ret = panthor_vm_unmap_pages(vm, iova: unmap_start, size: unmap_range);
2057	if (ret)
2058	return ret;
2059
2060	if (op->remap.prev) {
2061	prev_vma = panthor_vm_op_ctx_get_vma(op_ctx);
2062	panthor_vma_init(vma: prev_vma, flags: unmap_vma->flags);
2063	}
2064
2065	if (op->remap.next) {
2066	next_vma = panthor_vm_op_ctx_get_vma(op_ctx);
2067	panthor_vma_init(vma: next_vma, flags: unmap_vma->flags);
2068	}
2069
2070	drm_gpuva_remap(prev: prev_vma ? &prev_vma->base : NULL,
2071	next: next_vma ? &next_vma->base : NULL,
2072	op: &op->remap);
2073
2074	if (prev_vma) {
2075	/ panthor_vma_link() transfers the vm_bo ownership to*
2076	* the VMA object. Since the vm_bo we're passing is still
2077	* owned by the old mapping which will be released when this
2078	* mapping is destroyed, we need to grab a ref here.
2079	*/
2080	panthor_vma_link(vm, vma: prev_vma, vm_bo: op->remap.unmap->va->vm_bo);
2081	}
2082
2083	if (next_vma) {
2084	panthor_vma_link(vm, vma: next_vma, vm_bo: op->remap.unmap->va->vm_bo);
2085	}
2086
2087	panthor_vma_unlink(vma: unmap_vma);
2088	return `0`;
2089	}
2090
2091	static int panthor_gpuva_sm_step_unmap(struct drm_gpuva_op *op,
2092	void *priv)
2093	{
2094	struct panthor_vma unmap_vma = container_of(op->unmap.va, struct* panthor_vma, base);
2095	struct panthor_vm *vm = priv;
2096	int ret;
2097
2098	ret = panthor_vm_unmap_pages(vm, iova: unmap_vma->base.va.addr,
2099	size: unmap_vma->base.va.range);
2100	if (drm_WARN_ON(&vm->ptdev->base, ret))
2101	return ret;
2102
2103	drm_gpuva_unmap(op: &op->unmap);
2104	panthor_vma_unlink(vma: unmap_vma);
2105	return `0`;
2106	}
2107
2108	static const struct drm_gpuvm_ops panthor_gpuvm_ops = {
2109	.vm_free = panthor_vm_free,
2110	.vm_bo_free = panthor_vm_bo_free,
2111	.sm_step_map = panthor_gpuva_sm_step_map,
2112	.sm_step_remap = panthor_gpuva_sm_step_remap,
2113	.sm_step_unmap = panthor_gpuva_sm_step_unmap,
2114	};
2115
2116	/**
2117	* panthor_vm_resv() - Get the dma_resv object attached to a VM.
2118	* @vm: VM to get the dma_resv of.
2119	*
2120	* Return: A dma_resv object.
2121	*/
2122	struct dma_resv panthor_vm_resv(struct* panthor_vm *vm)
2123	{
2124	return drm_gpuvm_resv(&vm->base);
2125	}
2126
2127	struct drm_gem_object panthor_vm_root_gem(struct* panthor_vm *vm)
2128	{
2129	if (!vm)
2130	return NULL;
2131
2132	return vm->base.r_obj;
2133	}
2134
2135	static int
2136	panthor_vm_exec_op(struct panthor_vm vm, struct* panthor_vm_op_ctx *op,
2137	bool flag_vm_unusable_on_failure)
2138	{
2139	u32 op_type = op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK;
2140	int ret;
2141
2142	if (op_type == DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY)
2143	return `0`;
2144
2145	mutex_lock(&vm->op_lock);
2146	vm->op_ctx = op;
2147	switch (op_type) {
2148	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP: {
2149	const struct drm_gpuvm_map_req map_req = {
2150	.map.va.addr = op->va.addr,
2151	.map.va.range = op->va.range,
2152	.map.gem.obj = op->map.vm_bo->obj,
2153	.map.gem.offset = op->map.bo_offset,
2154	};
2155
2156	if (vm->unusable) {
2157	ret = -EINVAL;
2158	break;
2159	}
2160
2161	ret = drm_gpuvm_sm_map(gpuvm: &vm->base, priv: vm, req: &map_req);
2162	break;
2163	}
2164
2165	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
2166	ret = drm_gpuvm_sm_unmap(gpuvm: &vm->base, priv: vm, addr: op->va.addr, range: op->va.range);
2167	break;
2168
2169	default:
2170	ret = -EINVAL;
2171	break;
2172	}
2173
2174	if (ret && flag_vm_unusable_on_failure)
2175	vm->unusable = true;
2176
2177	vm->op_ctx = NULL;
2178	mutex_unlock(lock: &vm->op_lock);
2179
2180	return ret;
2181	}
2182
2183	static struct dma_fence *
2184	panthor_vm_bind_run_job(struct drm_sched_job *sched_job)
2185	{
2186	struct panthor_vm_bind_job job = container_of(sched_job, struct* panthor_vm_bind_job, base);
2187	bool cookie;
2188	int ret;
2189
2190	/ Not only we report an error whose result is propagated to the*
2191	* drm_sched finished fence, but we also flag the VM as unusable, because
2192	* a failure in the async VM_BIND results in an inconsistent state. VM needs
2193	* to be destroyed and recreated.
2194	*/
2195	cookie = dma_fence_begin_signalling();
2196	ret = panthor_vm_exec_op(vm: job->vm, op: &job->ctx, flag_vm_unusable_on_failure: true);
2197	dma_fence_end_signalling(cookie);
2198
2199	return ret ? ERR_PTR(error: ret) : NULL;
2200	}
2201
2202	static void panthor_vm_bind_job_release(struct kref *kref)
2203	{
2204	struct panthor_vm_bind_job job = container_of(kref, struct* panthor_vm_bind_job, refcount);
2205
2206	if (job->base.s_fence)
2207	drm_sched_job_cleanup(job: &job->base);
2208
2209	panthor_vm_cleanup_op_ctx(op_ctx: &job->ctx, vm: job->vm);
2210	panthor_vm_put(vm: job->vm);
2211	kfree(objp: job);
2212	}
2213
2214	/**
2215	* panthor_vm_bind_job_put() - Release a VM_BIND job reference
2216	* @sched_job: Job to release the reference on.
2217	*/
2218	void panthor_vm_bind_job_put(struct drm_sched_job *sched_job)
2219	{
2220	struct panthor_vm_bind_job *job =
2221	container_of(sched_job, struct panthor_vm_bind_job, base);
2222
2223	if (sched_job)
2224	kref_put(kref: &job->refcount, release: panthor_vm_bind_job_release);
2225	}
2226
2227	static void
2228	panthor_vm_bind_free_job(struct drm_sched_job *sched_job)
2229	{
2230	struct panthor_vm_bind_job *job =
2231	container_of(sched_job, struct panthor_vm_bind_job, base);
2232
2233	drm_sched_job_cleanup(job: sched_job);
2234
2235	/ Do the heavy cleanups asynchronously, so we're out of the*
2236	* dma-signaling path and can acquire dma-resv locks safely.
2237	*/
2238	queue_work(wq: panthor_cleanup_wq, work: &job->cleanup_op_ctx_work);
2239	}
2240
2241	static enum drm_gpu_sched_stat
2242	panthor_vm_bind_timedout_job(struct drm_sched_job *sched_job)
2243	{
2244	WARN(`1`, "VM_BIND ops are synchronous for now, there should be no timeout!");
2245	return DRM_GPU_SCHED_STAT_RESET;
2246	}
2247
2248	static const struct drm_sched_backend_ops panthor_vm_bind_ops = {
2249	.run_job = panthor_vm_bind_run_job,
2250	.free_job = panthor_vm_bind_free_job,
2251	.timedout_job = panthor_vm_bind_timedout_job,
2252	};
2253
2254	/**
2255	* panthor_vm_create() - Create a VM
2256	* @ptdev: Device.
2257	* @for_mcu: True if this is the FW MCU VM.
2258	* @kernel_va_start: Start of the range reserved for kernel BO mapping.
2259	* @kernel_va_size: Size of the range reserved for kernel BO mapping.
2260	* @auto_kernel_va_start: Start of the auto-VA kernel range.
2261	* @auto_kernel_va_size: Size of the auto-VA kernel range.
2262	*
2263	* Return: A valid pointer on success, an ERR_PTR() otherwise.
2264	*/
2265	struct panthor_vm *
2266	panthor_vm_create(struct panthor_device *ptdev, bool for_mcu,
2267	u64 kernel_va_start, u64 kernel_va_size,
2268	u64 auto_kernel_va_start, u64 auto_kernel_va_size)
2269	{
2270	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
2271	u32 pa_bits = GPU_MMU_FEATURES_PA_BITS(ptdev->gpu_info.mmu_features);
2272	u64 full_va_range = `1ull` << va_bits;
2273	struct drm_gem_object *dummy_gem;
2274	struct drm_gpu_scheduler *sched;
2275	const struct drm_sched_init_args sched_args = {
2276	.ops = &panthor_vm_bind_ops,
2277	.submit_wq = ptdev->mmu->vm.wq,
2278	.num_rqs = `1`,
2279	.credit_limit = `1`,
2280	/ Bind operations are synchronous for now, no timeout needed. /
2281	.timeout = MAX_SCHEDULE_TIMEOUT,
2282	.name = "panthor-vm-bind",
2283	.dev = ptdev->base.dev,
2284	};
2285	struct io_pgtable_cfg pgtbl_cfg;
2286	u64 mair, min_va, va_range;
2287	struct panthor_vm *vm;
2288	int ret;
2289
2290	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
2291	if (!vm)
2292	return ERR_PTR(error: -ENOMEM);
2293
2294	/ We allocate a dummy GEM for the VM. /
2295	dummy_gem = drm_gpuvm_resv_object_alloc(drm: &ptdev->base);
2296	if (!dummy_gem) {
2297	ret = -ENOMEM;
2298	goto err_free_vm;
2299	}
2300
2301	mutex_init(&vm->heaps.lock);
2302	vm->for_mcu = for_mcu;
2303	vm->ptdev = ptdev;
2304	mutex_init(&vm->op_lock);
2305
2306	if (for_mcu) {
2307	/ CSF MCU is a cortex M7, and can only address 4G /
2308	min_va = `0`;
2309	va_range = SZ_4G;
2310	} else {
2311	min_va = `0`;
2312	va_range = full_va_range;
2313	}
2314
2315	mutex_init(&vm->mm_lock);
2316	drm_mm_init(mm: &vm->mm, start: kernel_va_start, size: kernel_va_size);
2317	vm->kernel_auto_va.start = auto_kernel_va_start;
2318	vm->kernel_auto_va.end = vm->kernel_auto_va.start + auto_kernel_va_size - `1`;
2319
2320	INIT_LIST_HEAD(list: &vm->node);
2321	INIT_LIST_HEAD(list: &vm->as.lru_node);
2322	vm->as.id = -`1`;
2323	refcount_set(r: &vm->as.active_cnt, n: `0`);
2324
2325	pgtbl_cfg = (struct io_pgtable_cfg) {
2326	.pgsize_bitmap = SZ_4K \| SZ_2M,
2327	.ias = va_bits,
2328	.oas = pa_bits,
2329	.coherent_walk = ptdev->coherent,
2330	.tlb = &mmu_tlb_ops,
2331	.iommu_dev = ptdev->base.dev,
2332	.alloc = alloc_pt,
2333	.free = free_pt,
2334	};
2335
2336	vm->pgtbl_ops = alloc_io_pgtable_ops(fmt: ARM_64_LPAE_S1, cfg: &pgtbl_cfg, cookie: vm);
2337	if (!vm->pgtbl_ops) {
2338	ret = -EINVAL;
2339	goto err_mm_takedown;
2340	}
2341
2342	ret = drm_sched_init(sched: &vm->sched, args: &sched_args);
2343	if (ret)
2344	goto err_free_io_pgtable;
2345
2346	sched = &vm->sched;
2347	ret = drm_sched_entity_init(entity: &vm->entity, priority: `0`, sched_list: &sched, num_sched_list: `1`, NULL);
2348	if (ret)
2349	goto err_sched_fini;
2350
2351	mair = io_pgtable_ops_to_pgtable(vm->pgtbl_ops)->cfg.arm_lpae_s1_cfg.mair;
2352	vm->memattr = mair_to_memattr(mair, coherent: ptdev->coherent);
2353
2354	mutex_lock(&ptdev->mmu->vm.lock);
2355	list_add_tail(new: &vm->node, head: &ptdev->mmu->vm.list);
2356
2357	/ If a reset is in progress, stop the scheduler. /
2358	if (ptdev->mmu->vm.reset_in_progress)
2359	panthor_vm_stop(vm);
2360	mutex_unlock(lock: &ptdev->mmu->vm.lock);
2361
2362	/ We intentionally leave the reserved range to zero, because we want kernel VMAs*
2363	* to be handled the same way user VMAs are.
2364	*/
2365	drm_gpuvm_init(gpuvm: &vm->base, name: for_mcu ? "panthor-MCU-VM" : "panthor-GPU-VM",
2366	flags: DRM_GPUVM_RESV_PROTECTED \| DRM_GPUVM_IMMEDIATE_MODE,
2367	drm: &ptdev->base, r_obj: dummy_gem, start_offset: min_va, range: va_range, reserve_offset: `0`, reserve_range: `0`,
2368	ops: &panthor_gpuvm_ops);
2369	drm_gem_object_put(obj: dummy_gem);
2370	return vm;
2371
2372	err_sched_fini:
2373	drm_sched_fini(sched: &vm->sched);
2374
2375	err_free_io_pgtable:
2376	free_io_pgtable_ops(ops: vm->pgtbl_ops);
2377
2378	err_mm_takedown:
2379	drm_mm_takedown(mm: &vm->mm);
2380	drm_gem_object_put(obj: dummy_gem);
2381
2382	err_free_vm:
2383	kfree(objp: vm);
2384	return ERR_PTR(error: ret);
2385	}
2386
2387	static int
2388	panthor_vm_bind_prepare_op_ctx(struct drm_file *file,
2389	struct panthor_vm *vm,
2390	const struct drm_panthor_vm_bind_op *op,
2391	struct panthor_vm_op_ctx *op_ctx)
2392	{
2393	ssize_t vm_pgsz = panthor_vm_page_size(vm);
2394	struct drm_gem_object *gem;
2395	int ret;
2396
2397	/ Aligned on page size. /
2398	if (!IS_ALIGNED(op->va \| op->size \| op->bo_offset, vm_pgsz))
2399	return -EINVAL;
2400
2401	switch (op->flags & DRM_PANTHOR_VM_BIND_OP_TYPE_MASK) {
2402	case DRM_PANTHOR_VM_BIND_OP_TYPE_MAP:
2403	gem = drm_gem_object_lookup(filp: file, handle: op->bo_handle);
2404	ret = panthor_vm_prepare_map_op_ctx(op_ctx, vm,
2405	bo: gem ? to_panthor_bo(obj: gem) : NULL,
2406	offset: op->bo_offset,
2407	size: op->size,
2408	va: op->va,
2409	flags: op->flags);
2410	drm_gem_object_put(obj: gem);
2411	return ret;
2412
2413	case DRM_PANTHOR_VM_BIND_OP_TYPE_UNMAP:
2414	if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
2415	return -EINVAL;
2416
2417	if (op->bo_handle \|\| op->bo_offset)
2418	return -EINVAL;
2419
2420	return panthor_vm_prepare_unmap_op_ctx(op_ctx, vm, va: op->va, size: op->size);
2421
2422	case DRM_PANTHOR_VM_BIND_OP_TYPE_SYNC_ONLY:
2423	if (op->flags & ~DRM_PANTHOR_VM_BIND_OP_TYPE_MASK)
2424	return -EINVAL;
2425
2426	if (op->bo_handle \|\| op->bo_offset)
2427	return -EINVAL;
2428
2429	if (op->va \|\| op->size)
2430	return -EINVAL;
2431
2432	if (!op->syncs.count)
2433	return -EINVAL;
2434
2435	panthor_vm_prepare_sync_only_op_ctx(op_ctx, vm);
2436	return `0`;
2437
2438	default:
2439	return -EINVAL;
2440	}
2441	}
2442
2443	static void panthor_vm_bind_job_cleanup_op_ctx_work(struct work_struct *work)
2444	{
2445	struct panthor_vm_bind_job *job =
2446	container_of(work, struct panthor_vm_bind_job, cleanup_op_ctx_work);
2447
2448	panthor_vm_bind_job_put(sched_job: &job->base);
2449	}
2450
2451	/**
2452	* panthor_vm_bind_job_create() - Create a VM_BIND job
2453	* @file: File.
2454	* @vm: VM targeted by the VM_BIND job.
2455	* @op: VM operation data.
2456	*
2457	* Return: A valid pointer on success, an ERR_PTR() otherwise.
2458	*/
2459	struct drm_sched_job *
2460	panthor_vm_bind_job_create(struct drm_file *file,
2461	struct panthor_vm *vm,
2462	const struct drm_panthor_vm_bind_op *op)
2463	{
2464	struct panthor_vm_bind_job *job;
2465	int ret;
2466
2467	if (!vm)
2468	return ERR_PTR(error: -EINVAL);
2469
2470	if (vm->destroyed \|\| vm->unusable)
2471	return ERR_PTR(error: -EINVAL);
2472
2473	job = kzalloc(sizeof(*job), GFP_KERNEL);
2474	if (!job)
2475	return ERR_PTR(error: -ENOMEM);
2476
2477	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, op_ctx: &job->ctx);
2478	if (ret) {
2479	kfree(objp: job);
2480	return ERR_PTR(error: ret);
2481	}
2482
2483	INIT_WORK(&job->cleanup_op_ctx_work, panthor_vm_bind_job_cleanup_op_ctx_work);
2484	kref_init(kref: &job->refcount);
2485	job->vm = panthor_vm_get(vm);
2486
2487	ret = drm_sched_job_init(job: &job->base, entity: &vm->entity, credits: `1`, owner: vm, drm_client_id: file->client_id);
2488	if (ret)
2489	goto err_put_job;
2490
2491	return &job->base;
2492
2493	err_put_job:
2494	panthor_vm_bind_job_put(sched_job: &job->base);
2495	return ERR_PTR(error: ret);
2496	}
2497
2498	/**
2499	* panthor_vm_bind_job_prepare_resvs() - Prepare VM_BIND job dma_resvs
2500	* @exec: The locking/preparation context.
2501	* @sched_job: The job to prepare resvs on.
2502	*
2503	* Locks and prepare the VM resv.
2504	*
2505	* If this is a map operation, locks and prepares the GEM resv.
2506	*
2507	* Return: 0 on success, a negative error code otherwise.
2508	*/
2509	int panthor_vm_bind_job_prepare_resvs(struct drm_exec *exec,
2510	struct drm_sched_job *sched_job)
2511	{
2512	struct panthor_vm_bind_job job = container_of(sched_job, struct* panthor_vm_bind_job, base);
2513	int ret;
2514
2515	/ Acquire the VM lock an reserve a slot for this VM bind job. /
2516	ret = drm_gpuvm_prepare_vm(gpuvm: &job->vm->base, exec, num_fences: `1`);
2517	if (ret)
2518	return ret;
2519
2520	if (job->ctx.map.vm_bo) {
2521	/ Lock/prepare the GEM being mapped. /
2522	ret = drm_exec_prepare_obj(exec, obj: job->ctx.map.vm_bo->obj, num_fences: `1`);
2523	if (ret)
2524	return ret;
2525	}
2526
2527	return `0`;
2528	}
2529
2530	/**
2531	* panthor_vm_bind_job_update_resvs() - Update the resv objects touched by a job
2532	* @exec: drm_exec context.
2533	* @sched_job: Job to update the resvs on.
2534	*/
2535	void panthor_vm_bind_job_update_resvs(struct drm_exec *exec,
2536	struct drm_sched_job *sched_job)
2537	{
2538	struct panthor_vm_bind_job job = container_of(sched_job, struct* panthor_vm_bind_job, base);
2539
2540	/ Explicit sync => we just register our job finished fence as bookkeep. /
2541	drm_gpuvm_resv_add_fence(gpuvm: &job->vm->base, exec,
2542	fence: &sched_job->s_fence->finished,
2543	private_usage: DMA_RESV_USAGE_BOOKKEEP,
2544	extobj_usage: DMA_RESV_USAGE_BOOKKEEP);
2545	}
2546
2547	void panthor_vm_update_resvs(struct panthor_vm vm, struct* drm_exec *exec,
2548	struct dma_fence *fence,
2549	enum dma_resv_usage private_usage,
2550	enum dma_resv_usage extobj_usage)
2551	{
2552	drm_gpuvm_resv_add_fence(gpuvm: &vm->base, exec, fence, private_usage, extobj_usage);
2553	}
2554
2555	/**
2556	* panthor_vm_bind_exec_sync_op() - Execute a VM_BIND operation synchronously.
2557	* @file: File.
2558	* @vm: VM targeted by the VM operation.
2559	* @op: Data describing the VM operation.
2560	*
2561	* Return: 0 on success, a negative error code otherwise.
2562	*/
2563	int panthor_vm_bind_exec_sync_op(struct drm_file *file,
2564	struct panthor_vm *vm,
2565	struct drm_panthor_vm_bind_op *op)
2566	{
2567	struct panthor_vm_op_ctx op_ctx;
2568	int ret;
2569
2570	/ No sync objects allowed on synchronous operations. /
2571	if (op->syncs.count)
2572	return -EINVAL;
2573
2574	if (!op->size)
2575	return `0`;
2576
2577	ret = panthor_vm_bind_prepare_op_ctx(file, vm, op, op_ctx: &op_ctx);
2578	if (ret)
2579	return ret;
2580
2581	ret = panthor_vm_exec_op(vm, op: &op_ctx, flag_vm_unusable_on_failure: false);
2582	panthor_vm_cleanup_op_ctx(op_ctx: &op_ctx, vm);
2583
2584	return ret;
2585	}
2586
2587	/**
2588	* panthor_vm_map_bo_range() - Map a GEM object range to a VM
2589	* @vm: VM to map the GEM to.
2590	* @bo: GEM object to map.
2591	* @offset: Offset in the GEM object.
2592	* @size: Size to map.
2593	* @va: Virtual address to map the object to.
2594	* @flags: Combination of drm_panthor_vm_bind_op_flags flags.
2595	* Only map-related flags are valid.
2596	*
2597	* Internal use only. For userspace requests, use
2598	* panthor_vm_bind_exec_sync_op() instead.
2599	*
2600	* Return: 0 on success, a negative error code otherwise.
2601	*/
2602	int panthor_vm_map_bo_range(struct panthor_vm vm, struct* panthor_gem_object *bo,
2603	u64 offset, u64 size, u64 va, u32 flags)
2604	{
2605	struct panthor_vm_op_ctx op_ctx;
2606	int ret;
2607
2608	ret = panthor_vm_prepare_map_op_ctx(op_ctx: &op_ctx, vm, bo, offset, size, va, flags);
2609	if (ret)
2610	return ret;
2611
2612	ret = panthor_vm_exec_op(vm, op: &op_ctx, flag_vm_unusable_on_failure: false);
2613	panthor_vm_cleanup_op_ctx(op_ctx: &op_ctx, vm);
2614
2615	return ret;
2616	}
2617
2618	/**
2619	* panthor_vm_unmap_range() - Unmap a portion of the VA space
2620	* @vm: VM to unmap the region from.
2621	* @va: Virtual address to unmap. Must be 4k aligned.
2622	* @size: Size of the region to unmap. Must be 4k aligned.
2623	*
2624	* Internal use only. For userspace requests, use
2625	* panthor_vm_bind_exec_sync_op() instead.
2626	*
2627	* Return: 0 on success, a negative error code otherwise.
2628	*/
2629	int panthor_vm_unmap_range(struct panthor_vm *vm, u64 va, u64 size)
2630	{
2631	struct panthor_vm_op_ctx op_ctx;
2632	int ret;
2633
2634	ret = panthor_vm_prepare_unmap_op_ctx(op_ctx: &op_ctx, vm, va, size);
2635	if (ret)
2636	return ret;
2637
2638	ret = panthor_vm_exec_op(vm, op: &op_ctx, flag_vm_unusable_on_failure: false);
2639	panthor_vm_cleanup_op_ctx(op_ctx: &op_ctx, vm);
2640
2641	return ret;
2642	}
2643
2644	/**
2645	* panthor_vm_prepare_mapped_bos_resvs() - Prepare resvs on VM BOs.
2646	* @exec: Locking/preparation context.
2647	* @vm: VM targeted by the GPU job.
2648	* @slot_count: Number of slots to reserve.
2649	*
2650	* GPU jobs assume all BOs bound to the VM at the time the job is submitted
2651	* are available when the job is executed. In order to guarantee that, we
2652	* need to reserve a slot on all BOs mapped to a VM and update this slot with
2653	* the job fence after its submission.
2654	*
2655	* Return: 0 on success, a negative error code otherwise.
2656	*/
2657	int panthor_vm_prepare_mapped_bos_resvs(struct drm_exec exec, struct* panthor_vm *vm,
2658	u32 slot_count)
2659	{
2660	int ret;
2661
2662	/ Acquire the VM lock and reserve a slot for this GPU job. /
2663	ret = drm_gpuvm_prepare_vm(gpuvm: &vm->base, exec, num_fences: slot_count);
2664	if (ret)
2665	return ret;
2666
2667	return drm_gpuvm_prepare_objects(gpuvm: &vm->base, exec, num_fences: slot_count);
2668	}
2669
2670	/**
2671	* panthor_mmu_unplug() - Unplug the MMU logic
2672	* @ptdev: Device.
2673	*
2674	* No access to the MMU regs should be done after this function is called.
2675	* We suspend the IRQ and disable all VMs to guarantee that.
2676	*/
2677	void panthor_mmu_unplug(struct panthor_device *ptdev)
2678	{
2679	if (!IS_ENABLED(CONFIG_PM) \|\| pm_runtime_active(dev: ptdev->base.dev))
2680	panthor_mmu_irq_suspend(pirq: &ptdev->mmu->irq);
2681
2682	mutex_lock(&ptdev->mmu->as.slots_lock);
2683	for (u32 i = `0`; i < ARRAY_SIZE(ptdev->mmu->as.slots); i++) {
2684	struct panthor_vm *vm = ptdev->mmu->as.slots[i].vm;
2685
2686	if (vm) {
2687	drm_WARN_ON(&ptdev->base, panthor_mmu_as_disable(ptdev, i));
2688	panthor_vm_release_as_locked(vm);
2689	}
2690	}
2691	mutex_unlock(lock: &ptdev->mmu->as.slots_lock);
2692	}
2693
2694	static void panthor_mmu_release_wq(struct drm_device ddev, void* *res)
2695	{
2696	destroy_workqueue(wq: res);
2697	}
2698
2699	/**
2700	* panthor_mmu_init() - Initialize the MMU logic.
2701	* @ptdev: Device.
2702	*
2703	* Return: 0 on success, a negative error code otherwise.
2704	*/
2705	int panthor_mmu_init(struct panthor_device *ptdev)
2706	{
2707	u32 va_bits = GPU_MMU_FEATURES_VA_BITS(ptdev->gpu_info.mmu_features);
2708	struct panthor_mmu *mmu;
2709	int ret, irq;
2710
2711	mmu = drmm_kzalloc(dev: &ptdev->base, size: sizeof(*mmu), GFP_KERNEL);
2712	if (!mmu)
2713	return -ENOMEM;
2714
2715	INIT_LIST_HEAD(list: &mmu->as.lru_list);
2716
2717	ret = drmm_mutex_init(&ptdev->base, &mmu->as.slots_lock);
2718	if (ret)
2719	return ret;
2720
2721	INIT_LIST_HEAD(list: &mmu->vm.list);
2722	ret = drmm_mutex_init(&ptdev->base, &mmu->vm.lock);
2723	if (ret)
2724	return ret;
2725
2726	ptdev->mmu = mmu;
2727
2728	irq = platform_get_irq_byname(to_platform_device(ptdev->base.dev), "mmu");
2729	if (irq <= `0`)
2730	return -ENODEV;
2731
2732	ret = panthor_request_mmu_irq(ptdev, pirq: &mmu->irq, irq,
2733	mask: panthor_mmu_fault_mask(ptdev, value: ~`0`));
2734	if (ret)
2735	return ret;
2736
2737	mmu->vm.wq = alloc_workqueue("panthor-vm-bind", WQ_UNBOUND, `0`);
2738	if (!mmu->vm.wq)
2739	return -ENOMEM;
2740
2741	/ On 32-bit kernels, the VA space is limited by the io_pgtable_ops abstraction,*
2742	* which passes iova as an unsigned long. Patch the mmu_features to reflect this
2743	* limitation.
2744	*/
2745	if (va_bits > BITS_PER_LONG) {
2746	ptdev->gpu_info.mmu_features &= ~GENMASK(`7`, `0`);
2747	ptdev->gpu_info.mmu_features \|= BITS_PER_LONG;
2748	}
2749
2750	return drmm_add_action_or_reset(&ptdev->base, panthor_mmu_release_wq, mmu->vm.wq);
2751	}
2752
2753	#ifdef CONFIG_DEBUG_FS
2754	static int show_vm_gpuvas(struct panthor_vm vm, struct* seq_file *m)
2755	{
2756	int ret;
2757
2758	mutex_lock(&vm->op_lock);
2759	ret = drm_debugfs_gpuva_info(m, gpuvm: &vm->base);
2760	mutex_unlock(lock: &vm->op_lock);
2761
2762	return ret;
2763	}
2764
2765	static int show_each_vm(struct seq_file m, void* *arg)
2766	{
2767	struct drm_info_node node = (struct* drm_info_node *)m->private;
2768	struct drm_device *ddev = node->minor->dev;
2769	struct panthor_device ptdev = container_of(ddev, struct* panthor_device, base);
2770	int (show)(struct* panthor_vm , struct* seq_file *) = node->info_ent->data;
2771	struct panthor_vm *vm;
2772	int ret = `0`;
2773
2774	mutex_lock(&ptdev->mmu->vm.lock);
2775	list_for_each_entry(vm, &ptdev->mmu->vm.list, node) {
2776	ret = show(vm, m);
2777	if (ret < `0`)
2778	break;
2779
2780	seq_puts(m, s: "\n");
2781	}
2782	mutex_unlock(lock: &ptdev->mmu->vm.lock);
2783
2784	return ret;
2785	}
2786
2787	static struct drm_info_list panthor_mmu_debugfs_list[] = {
2788	DRM_DEBUGFS_GPUVA_INFO(show_each_vm, show_vm_gpuvas),
2789	};
2790
2791	/**
2792	* panthor_mmu_debugfs_init() - Initialize MMU debugfs entries
2793	* @minor: Minor.
2794	*/
2795	void panthor_mmu_debugfs_init(struct drm_minor *minor)
2796	{
2797	drm_debugfs_create_files(files: panthor_mmu_debugfs_list,
2798	ARRAY_SIZE(panthor_mmu_debugfs_list),
2799	root: minor->debugfs_root, minor);
2800	}
2801	#endif /* CONFIG_DEBUG_FS */
2802
2803	/**
2804	* panthor_mmu_pt_cache_init() - Initialize the page table cache.
2805	*
2806	* Return: 0 on success, a negative error code otherwise.
2807	*/
2808	int panthor_mmu_pt_cache_init(void)
2809	{
2810	pt_cache = kmem_cache_create("panthor-mmu-pt", SZ_4K, SZ_4K, `0`, NULL);
2811	if (!pt_cache)
2812	return -ENOMEM;
2813
2814	return `0`;
2815	}
2816
2817	/**
2818	* panthor_mmu_pt_cache_fini() - Destroy the page table cache.
2819	*/
2820	void panthor_mmu_pt_cache_fini(void)
2821	{
2822	kmem_cache_destroy(s: pt_cache);
2823	}
2824

source code of linux/drivers/gpu/drm/panthor/panthor_mmu.c