xe_vm.c source code [linux/drivers/gpu/drm/xe/xe_vm.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2021 Intel Corporation
4	*/
5
6	#include "xe_vm.h"
7
8	#include <linux/dma-fence-array.h>
9	#include <linux/nospec.h>
10
11	#include <drm/drm_drv.h>
12	#include <drm/drm_exec.h>
13	#include <drm/drm_print.h>
14	#include <drm/ttm/ttm_tt.h>
15	#include <uapi/drm/xe_drm.h>
16	#include <linux/ascii85.h>
17	#include <linux/delay.h>
18	#include <linux/kthread.h>
19	#include <linux/mm.h>
20	#include <linux/swap.h>
21
22	#include <generated/xe_wa_oob.h>
23
24	#include "regs/xe_gtt_defs.h"
25	#include "xe_assert.h"
26	#include "xe_bo.h"
27	#include "xe_device.h"
28	#include "xe_drm_client.h"
29	#include "xe_exec_queue.h"
30	#include "xe_migrate.h"
31	#include "xe_pat.h"
32	#include "xe_pm.h"
33	#include "xe_preempt_fence.h"
34	#include "xe_pt.h"
35	#include "xe_pxp.h"
36	#include "xe_res_cursor.h"
37	#include "xe_sriov_vf.h"
38	#include "xe_svm.h"
39	#include "xe_sync.h"
40	#include "xe_tile.h"
41	#include "xe_tlb_inval.h"
42	#include "xe_trace_bo.h"
43	#include "xe_wa.h"
44
45	static struct drm_gem_object xe_vm_obj(struct* xe_vm *vm)
46	{
47	return vm->gpuvm.r_obj;
48	}
49
50	/**
51	* xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
52	* @vm: The vm whose resv is to be locked.
53	* @exec: The drm_exec transaction.
54	*
55	* Helper to lock the vm's resv as part of a drm_exec transaction.
56	*
57	* Return: %0 on success. See drm_exec_lock_obj() for error codes.
58	*/
59	int xe_vm_drm_exec_lock(struct xe_vm vm, struct* drm_exec *exec)
60	{
61	return drm_exec_lock_obj(exec, obj: xe_vm_obj(vm));
62	}
63
64	static bool preempt_fences_waiting(struct xe_vm *vm)
65	{
66	struct xe_exec_queue *q;
67
68	lockdep_assert_held(&vm->lock);
69	xe_vm_assert_held(vm);
70
71	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
72	if (!q->lr.pfence \|\|
73	test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
74	&q->lr.pfence->flags)) {
75	return true;
76	}
77	}
78
79	return false;
80	}
81
82	static void free_preempt_fences(struct list_head *list)
83	{
84	struct list_head link, next;
85
86	list_for_each_safe(link, next, list)
87	xe_preempt_fence_free(pfence: to_preempt_fence_from_link(link));
88	}
89
90	static int alloc_preempt_fences(struct xe_vm vm, struct* list_head *list,
91	unsigned int *count)
92	{
93	lockdep_assert_held(&vm->lock);
94	xe_vm_assert_held(vm);
95
96	if (*count >= vm->preempt.num_exec_queues)
97	return `0`;
98
99	for (; count < vm->preempt.num_exec_queues; ++(count)) {
100	struct xe_preempt_fence *pfence = xe_preempt_fence_alloc();
101
102	if (IS_ERR(ptr: pfence))
103	return PTR_ERR(ptr: pfence);
104
105	list_move_tail(list: xe_preempt_fence_link(pfence), head: list);
106	}
107
108	return `0`;
109	}
110
111	static int wait_for_existing_preempt_fences(struct xe_vm *vm)
112	{
113	struct xe_exec_queue *q;
114	bool vf_migration = IS_SRIOV_VF(vm->xe) &&
115	xe_sriov_vf_migration_supported(xe: vm->xe);
116	signed long wait_time = vf_migration ? HZ / `5` : MAX_SCHEDULE_TIMEOUT;
117
118	xe_vm_assert_held(vm);
119
120	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
121	if (q->lr.pfence) {
122	long timeout;
123
124	timeout = dma_fence_wait_timeout(q->lr.pfence, intr: false,
125	timeout: wait_time);
126	if (!timeout) {
127	xe_assert(vm->xe, vf_migration);
128	return -EAGAIN;
129	}
130
131	/ Only -ETIME on fence indicates VM needs to be killed /
132	if (timeout < `0` \|\| q->lr.pfence->error == -ETIME)
133	return -ETIME;
134
135	dma_fence_put(fence: q->lr.pfence);
136	q->lr.pfence = NULL;
137	}
138	}
139
140	return `0`;
141	}
142
143	static bool xe_vm_is_idle(struct xe_vm *vm)
144	{
145	struct xe_exec_queue *q;
146
147	xe_vm_assert_held(vm);
148	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
149	if (!xe_exec_queue_is_idle(q))
150	return false;
151	}
152
153	return true;
154	}
155
156	static void arm_preempt_fences(struct xe_vm vm, struct* list_head *list)
157	{
158	struct list_head *link;
159	struct xe_exec_queue *q;
160
161	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
162	struct dma_fence *fence;
163
164	link = list->next;
165	xe_assert(vm->xe, link != list);
166
167	fence = xe_preempt_fence_arm(pfence: to_preempt_fence_from_link(link),
168	q, context: q->lr.context,
169	seqno: ++q->lr.seqno);
170	dma_fence_put(fence: q->lr.pfence);
171	q->lr.pfence = fence;
172	}
173	}
174
175	static int add_preempt_fences(struct xe_vm vm, struct* xe_bo *bo)
176	{
177	struct xe_exec_queue *q;
178	int err;
179
180	xe_bo_assert_held(bo);
181
182	if (!vm->preempt.num_exec_queues)
183	return `0`;
184
185	err = dma_resv_reserve_fences(obj: bo->ttm.base.resv, num_fences: vm->preempt.num_exec_queues);
186	if (err)
187	return err;
188
189	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
190	if (q->lr.pfence) {
191	dma_resv_add_fence(obj: bo->ttm.base.resv,
192	fence: q->lr.pfence,
193	usage: DMA_RESV_USAGE_BOOKKEEP);
194	}
195
196	return `0`;
197	}
198
199	static void resume_and_reinstall_preempt_fences(struct xe_vm *vm,
200	struct drm_exec *exec)
201	{
202	struct xe_exec_queue *q;
203
204	lockdep_assert_held(&vm->lock);
205	xe_vm_assert_held(vm);
206
207	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
208	q->ops->resume(q);
209
210	drm_gpuvm_resv_add_fence(gpuvm: &vm->gpuvm, exec, fence: q->lr.pfence,
211	private_usage: DMA_RESV_USAGE_BOOKKEEP, extobj_usage: DMA_RESV_USAGE_BOOKKEEP);
212	}
213	}
214
215	int xe_vm_add_compute_exec_queue(struct xe_vm vm, struct* xe_exec_queue *q)
216	{
217	struct drm_gpuvm_exec vm_exec = {
218	.vm = &vm->gpuvm,
219	.flags = DRM_EXEC_INTERRUPTIBLE_WAIT,
220	.num_fences = `1`,
221	};
222	struct drm_exec *exec = &vm_exec.exec;
223	struct xe_validation_ctx ctx;
224	struct dma_fence *pfence;
225	int err;
226	bool wait;
227
228	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
229
230	down_write(sem: &vm->lock);
231	err = xe_validation_exec_lock(ctx: &ctx, vm_exec: &vm_exec, val: &vm->xe->val);
232	if (err)
233	goto out_up_write;
234
235	pfence = xe_preempt_fence_create(q, context: q->lr.context,
236	seqno: ++q->lr.seqno);
237	if (IS_ERR(ptr: pfence)) {
238	err = PTR_ERR(ptr: pfence);
239	goto out_fini;
240	}
241
242	list_add(new: &q->lr.link, head: &vm->preempt.exec_queues);
243	++vm->preempt.num_exec_queues;
244	q->lr.pfence = pfence;
245
246	xe_svm_notifier_lock(vm);
247
248	drm_gpuvm_resv_add_fence(gpuvm: &vm->gpuvm, exec, fence: pfence,
249	private_usage: DMA_RESV_USAGE_BOOKKEEP, extobj_usage: DMA_RESV_USAGE_BOOKKEEP);
250
251	/*
252	* Check to see if a preemption on VM is in flight or userptr
253	* invalidation, if so trigger this preempt fence to sync state with
254	* other preempt fences on the VM.
255	*/
256	wait = __xe_vm_userptr_needs_repin(vm) \|\| preempt_fences_waiting(vm);
257	if (wait)
258	dma_fence_enable_sw_signaling(fence: pfence);
259
260	xe_svm_notifier_unlock(vm);
261
262	out_fini:
263	xe_validation_ctx_fini(ctx: &ctx);
264	out_up_write:
265	up_write(sem: &vm->lock);
266
267	return err;
268	}
269	ALLOW_ERROR_INJECTION(xe_vm_add_compute_exec_queue, ERRNO);
270
271	/**
272	* xe_vm_remove_compute_exec_queue() - Remove compute exec queue from VM
273	* @vm: The VM.
274	* @q: The exec_queue
275	*
276	* Note that this function might be called multiple times on the same queue.
277	*/
278	void xe_vm_remove_compute_exec_queue(struct xe_vm vm, struct* xe_exec_queue *q)
279	{
280	if (!xe_vm_in_preempt_fence_mode(vm))
281	return;
282
283	down_write(sem: &vm->lock);
284	if (!list_empty(head: &q->lr.link)) {
285	list_del_init(entry: &q->lr.link);
286	--vm->preempt.num_exec_queues;
287	}
288	if (q->lr.pfence) {
289	dma_fence_enable_sw_signaling(fence: q->lr.pfence);
290	dma_fence_put(fence: q->lr.pfence);
291	q->lr.pfence = NULL;
292	}
293	up_write(sem: &vm->lock);
294	}
295
296	#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
297
298	/**
299	* xe_vm_kill() - VM Kill
300	* @vm: The VM.
301	* @unlocked: Flag indicates the VM's dma-resv is not held
302	*
303	* Kill the VM by setting banned flag indicated VM is no longer available for
304	* use. If in preempt fence mode, also kill all exec queue attached to the VM.
305	*/
306	void xe_vm_kill(struct xe_vm *vm, bool unlocked)
307	{
308	struct xe_exec_queue *q;
309
310	lockdep_assert_held(&vm->lock);
311
312	if (unlocked)
313	xe_vm_lock(vm, intr: false);
314
315	vm->flags \|= XE_VM_FLAG_BANNED;
316	trace_xe_vm_kill(vm);
317
318	list_for_each_entry(q, &vm->preempt.exec_queues, lr.link)
319	q->ops->kill(q);
320
321	if (unlocked)
322	xe_vm_unlock(vm);
323
324	/ TODO: Inform user the VM is banned /
325	}
326
327	static int xe_gpuvm_validate(struct drm_gpuvm_bo vm_bo, struct* drm_exec *exec)
328	{
329	struct xe_vm *vm = gpuvm_to_vm(gpuvm: vm_bo->vm);
330	struct drm_gpuva *gpuva;
331	int ret;
332
333	lockdep_assert_held(&vm->lock);
334	drm_gpuvm_bo_for_each_va(gpuva, vm_bo)
335	list_move_tail(list: &gpuva_to_vma(gpuva)->combined_links.rebind,
336	head: &vm->rebind_list);
337
338	if (!try_wait_for_completion(x: &vm->xe->pm_block))
339	return -EAGAIN;
340
341	ret = xe_bo_validate(bo: gem_to_xe_bo(obj: vm_bo->obj), vm, allow_res_evict: false, exec);
342	if (ret)
343	return ret;
344
345	vm_bo->evicted = false;
346	return `0`;
347	}
348
349	/**
350	* xe_vm_validate_rebind() - Validate buffer objects and rebind vmas
351	* @vm: The vm for which we are rebinding.
352	* @exec: The struct drm_exec with the locked GEM objects.
353	* @num_fences: The number of fences to reserve for the operation, not
354	* including rebinds and validations.
355	*
356	* Validates all evicted gem objects and rebinds their vmas. Note that
357	* rebindings may cause evictions and hence the validation-rebind
358	* sequence is rerun until there are no more objects to validate.
359	*
360	* Return: 0 on success, negative error code on error. In particular,
361	* may return -EINTR or -ERESTARTSYS if interrupted, and -EDEADLK if
362	* the drm_exec transaction needs to be restarted.
363	*/
364	int xe_vm_validate_rebind(struct xe_vm vm, struct* drm_exec *exec,
365	unsigned int num_fences)
366	{
367	struct drm_gem_object *obj;
368	unsigned long index;
369	int ret;
370
371	do {
372	ret = drm_gpuvm_validate(gpuvm: &vm->gpuvm, exec);
373	if (ret)
374	return ret;
375
376	ret = xe_vm_rebind(vm, rebind_worker: false);
377	if (ret)
378	return ret;
379	} while (!list_empty(head: &vm->gpuvm.evict.list));
380
381	drm_exec_for_each_locked_object(exec, index, obj) {
382	ret = dma_resv_reserve_fences(obj: obj->resv, num_fences);
383	if (ret)
384	return ret;
385	}
386
387	return `0`;
388	}
389
390	static int xe_preempt_work_begin(struct drm_exec exec, struct* xe_vm *vm,
391	bool *done)
392	{
393	int err;
394
395	err = drm_gpuvm_prepare_vm(gpuvm: &vm->gpuvm, exec, num_fences: `0`);
396	if (err)
397	return err;
398
399	if (xe_vm_is_idle(vm)) {
400	vm->preempt.rebind_deactivated = true;
401	*done = true;
402	return `0`;
403	}
404
405	if (!preempt_fences_waiting(vm)) {
406	*done = true;
407	return `0`;
408	}
409
410	err = drm_gpuvm_prepare_objects(gpuvm: &vm->gpuvm, exec, num_fences: `0`);
411	if (err)
412	return err;
413
414	err = wait_for_existing_preempt_fences(vm);
415	if (err)
416	return err;
417
418	/*
419	* Add validation and rebinding to the locking loop since both can
420	* cause evictions which may require blocing dma_resv locks.
421	* The fence reservation here is intended for the new preempt fences
422	* we attach at the end of the rebind work.
423	*/
424	return xe_vm_validate_rebind(vm, exec, num_fences: vm->preempt.num_exec_queues);
425	}
426
427	static bool vm_suspend_rebind_worker(struct xe_vm *vm)
428	{
429	struct xe_device *xe = vm->xe;
430	bool ret = false;
431
432	mutex_lock(&xe->rebind_resume_lock);
433	if (!try_wait_for_completion(x: &vm->xe->pm_block)) {
434	ret = true;
435	list_move_tail(list: &vm->preempt.pm_activate_link, head: &xe->rebind_resume_list);
436	}
437	mutex_unlock(lock: &xe->rebind_resume_lock);
438
439	return ret;
440	}
441
442	/**
443	* xe_vm_resume_rebind_worker() - Resume the rebind worker.
444	* @vm: The vm whose preempt worker to resume.
445	*
446	* Resume a preempt worker that was previously suspended by
447	* vm_suspend_rebind_worker().
448	*/
449	void xe_vm_resume_rebind_worker(struct xe_vm *vm)
450	{
451	queue_work(wq: vm->xe->ordered_wq, work: &vm->preempt.rebind_work);
452	}
453
454	static void preempt_rebind_work_func(struct work_struct *w)
455	{
456	struct xe_vm vm = container_of(w, struct* xe_vm, preempt.rebind_work);
457	struct xe_validation_ctx ctx;
458	struct drm_exec exec;
459	unsigned int fence_count = `0`;
460	LIST_HEAD(preempt_fences);
461	int err = `0`;
462	long wait;
463	int __maybe_unused tries = `0`;
464
465	xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
466	trace_xe_vm_rebind_worker_enter(vm);
467
468	down_write(sem: &vm->lock);
469
470	if (xe_vm_is_closed_or_banned(vm)) {
471	up_write(sem: &vm->lock);
472	trace_xe_vm_rebind_worker_exit(vm);
473	return;
474	}
475
476	retry:
477	if (!try_wait_for_completion(x: &vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
478	up_write(sem: &vm->lock);
479	/ We don't actually block but don't make progress. /
480	xe_pm_might_block_on_suspend();
481	return;
482	}
483
484	if (xe_vm_userptr_check_repin(vm)) {
485	err = xe_vm_userptr_pin(vm);
486	if (err)
487	goto out_unlock_outer;
488	}
489
490	err = xe_validation_ctx_init(ctx: &ctx, val: &vm->xe->val, exec: &exec,
491	flags: (struct xe_val_flags) {.interruptible = true});
492	if (err)
493	goto out_unlock_outer;
494
495	drm_exec_until_all_locked(&exec) {
496	bool done = false;
497
498	err = xe_preempt_work_begin(exec: &exec, vm, done: &done);
499	drm_exec_retry_on_contention(&exec);
500	xe_validation_retry_on_oom(&ctx, &err);
501	if (err \|\| done) {
502	xe_validation_ctx_fini(ctx: &ctx);
503	goto out_unlock_outer;
504	}
505	}
506
507	err = alloc_preempt_fences(vm, list: &preempt_fences, count: &fence_count);
508	if (err)
509	goto out_unlock;
510
511	xe_vm_set_validation_exec(vm, exec: &exec);
512	err = xe_vm_rebind(vm, rebind_worker: true);
513	xe_vm_set_validation_exec(vm, NULL);
514	if (err)
515	goto out_unlock;
516
517	/ Wait on rebinds and munmap style VM unbinds /
518	wait = dma_resv_wait_timeout(obj: xe_vm_resv(vm),
519	usage: DMA_RESV_USAGE_KERNEL,
520	intr: false, MAX_SCHEDULE_TIMEOUT);
521	if (wait <= `0`) {
522	err = -ETIME;
523	goto out_unlock;
524	}
525
526	#define retry_required(__tries, __vm) \
527	(IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT) ? \
528	(!(__tries)++ \|\| __xe_vm_userptr_needs_repin(__vm)) : \
529	__xe_vm_userptr_needs_repin(__vm))
530
531	xe_svm_notifier_lock(vm);
532	if (retry_required(tries, vm)) {
533	xe_svm_notifier_unlock(vm);
534	err = -EAGAIN;
535	goto out_unlock;
536	}
537
538	#undef retry_required
539
540	spin_lock(lock: &vm->xe->ttm.lru_lock);
541	ttm_lru_bulk_move_tail(bulk: &vm->lru_bulk_move);
542	spin_unlock(lock: &vm->xe->ttm.lru_lock);
543
544	/ Point of no return. /
545	arm_preempt_fences(vm, list: &preempt_fences);
546	resume_and_reinstall_preempt_fences(vm, exec: &exec);
547	xe_svm_notifier_unlock(vm);
548
549	out_unlock:
550	xe_validation_ctx_fini(ctx: &ctx);
551	out_unlock_outer:
552	if (err == -EAGAIN) {
553	trace_xe_vm_rebind_worker_retry(vm);
554
555	/*
556	* We can't block in workers on a VF which supports migration
557	* given this can block the VF post-migration workers from
558	* getting scheduled.
559	*/
560	if (IS_SRIOV_VF(vm->xe) &&
561	xe_sriov_vf_migration_supported(xe: vm->xe)) {
562	up_write(sem: &vm->lock);
563	xe_vm_queue_rebind_worker(vm);
564	return;
565	}
566
567	goto retry;
568	}
569
570	if (err) {
571	drm_warn(&vm->xe->drm, "VM worker error: %d\n", err);
572	xe_vm_kill(vm, unlocked: true);
573	}
574	up_write(sem: &vm->lock);
575
576	free_preempt_fences(list: &preempt_fences);
577
578	trace_xe_vm_rebind_worker_exit(vm);
579	}
580
581	static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
582	{
583	int i;
584
585	for (i = `0`; i < XE_MAX_TILES_PER_DEVICE; ++i) {
586	if (!vops->pt_update_ops[i].num_ops)
587	continue;
588
589	vops->pt_update_ops[i].ops =
590	kmalloc_array(vops->pt_update_ops[i].num_ops,
591	sizeof(*vops->pt_update_ops[i].ops),
592	GFP_KERNEL \| __GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
593	if (!vops->pt_update_ops[i].ops)
594	return array_of_binds ? -ENOBUFS : -ENOMEM;
595	}
596
597	return `0`;
598	}
599	ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
600
601	static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
602	{
603	struct xe_vma *vma;
604
605	vma = gpuva_to_vma(gpuva: op->base.prefetch.va);
606
607	if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
608	xa_destroy(&op->prefetch_range.range);
609	}
610
611	static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
612	{
613	struct xe_vma_op *op;
614
615	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
616	return;
617
618	list_for_each_entry(op, &vops->list, link)
619	xe_vma_svm_prefetch_op_fini(op);
620	}
621
622	static void xe_vma_ops_fini(struct xe_vma_ops *vops)
623	{
624	int i;
625
626	xe_vma_svm_prefetch_ops_fini(vops);
627
628	for (i = `0`; i < XE_MAX_TILES_PER_DEVICE; ++i)
629	kfree(objp: vops->pt_update_ops[i].ops);
630	}
631
632	static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops vops, u8 tile_mask, int* inc_val)
633	{
634	int i;
635
636	if (!inc_val)
637	return;
638
639	for (i = `0`; i < XE_MAX_TILES_PER_DEVICE; ++i)
640	if (BIT(i) & tile_mask)
641	vops->pt_update_ops[i].num_ops += inc_val;
642	}
643
644	#define XE_VMA_CREATE_MASK ( \
645	XE_VMA_READ_ONLY \| \
646	XE_VMA_DUMPABLE \| \
647	XE_VMA_SYSTEM_ALLOCATOR \| \
648	DRM_GPUVA_SPARSE \| \
649	XE_VMA_MADV_AUTORESET)
650
651	static void xe_vm_populate_rebind(struct xe_vma_op op, struct* xe_vma *vma,
652	u8 tile_mask)
653	{
654	INIT_LIST_HEAD(list: &op->link);
655	op->tile_mask = tile_mask;
656	op->base.op = DRM_GPUVA_OP_MAP;
657	op->base.map.va.addr = vma->gpuva.va.addr;
658	op->base.map.va.range = vma->gpuva.va.range;
659	op->base.map.gem.obj = vma->gpuva.gem.obj;
660	op->base.map.gem.offset = vma->gpuva.gem.offset;
661	op->map.vma = vma;
662	op->map.immediate = true;
663	op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
664	}
665
666	static int xe_vm_ops_add_rebind(struct xe_vma_ops vops, struct* xe_vma *vma,
667	u8 tile_mask)
668	{
669	struct xe_vma_op *op;
670
671	op = kzalloc(sizeof(*op), GFP_KERNEL);
672	if (!op)
673	return -ENOMEM;
674
675	xe_vm_populate_rebind(op, vma, tile_mask);
676	list_add_tail(new: &op->link, head: &vops->list);
677	xe_vma_ops_incr_pt_update_ops(vops, tile_mask, inc_val: `1`);
678
679	return `0`;
680	}
681
682	static struct dma_fence ops_execute(struct* xe_vm *vm,
683	struct xe_vma_ops *vops);
684	static void xe_vma_ops_init(struct xe_vma_ops vops, struct* xe_vm *vm,
685	struct xe_exec_queue *q,
686	struct xe_sync_entry *syncs, u32 num_syncs);
687
688	int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
689	{
690	struct dma_fence *fence;
691	struct xe_vma vma, next;
692	struct xe_vma_ops vops;
693	struct xe_vma_op op, next_op;
694	int err, i;
695
696	lockdep_assert_held(&vm->lock);
697	if ((xe_vm_in_lr_mode(vm) && !rebind_worker) \|\|
698	list_empty(head: &vm->rebind_list))
699	return `0`;
700
701	xe_vma_ops_init(vops: &vops, vm, NULL, NULL, num_syncs: `0`);
702	for (i = `0`; i < XE_MAX_TILES_PER_DEVICE; ++i)
703	vops.pt_update_ops[i].wait_vm_bookkeep = true;
704
705	xe_vm_assert_held(vm);
706	list_for_each_entry(vma, &vm->rebind_list, combined_links.rebind) {
707	xe_assert(vm->xe, vma->tile_present);
708
709	if (rebind_worker)
710	trace_xe_vma_rebind_worker(vma);
711	else
712	trace_xe_vma_rebind_exec(vma);
713
714	err = xe_vm_ops_add_rebind(vops: &vops, vma,
715	tile_mask: vma->tile_present);
716	if (err)
717	goto free_ops;
718	}
719
720	err = xe_vma_ops_alloc(vops: &vops, array_of_binds: false);
721	if (err)
722	goto free_ops;
723
724	fence = ops_execute(vm, vops: &vops);
725	if (IS_ERR(ptr: fence)) {
726	err = PTR_ERR(ptr: fence);
727	} else {
728	dma_fence_put(fence);
729	list_for_each_entry_safe(vma, next, &vm->rebind_list,
730	combined_links.rebind)
731	list_del_init(entry: &vma->combined_links.rebind);
732	}
733	free_ops:
734	list_for_each_entry_safe(op, next_op, &vops.list, link) {
735	list_del(entry: &op->link);
736	kfree(objp: op);
737	}
738	xe_vma_ops_fini(vops: &vops);
739
740	return err;
741	}
742
743	struct dma_fence xe_vma_rebind(struct* xe_vm vm, struct* xe_vma *vma, u8 tile_mask)
744	{
745	struct dma_fence *fence = NULL;
746	struct xe_vma_ops vops;
747	struct xe_vma_op op, next_op;
748	struct xe_tile *tile;
749	u8 id;
750	int err;
751
752	lockdep_assert_held(&vm->lock);
753	xe_vm_assert_held(vm);
754	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
755
756	xe_vma_ops_init(vops: &vops, vm, NULL, NULL, num_syncs: `0`);
757	vops.flags \|= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
758	for_each_tile(tile, vm->xe, id) {
759	vops.pt_update_ops[id].wait_vm_bookkeep = true;
760	vops.pt_update_ops[tile->id].q =
761	xe_migrate_exec_queue(migrate: tile->migrate);
762	}
763
764	err = xe_vm_ops_add_rebind(vops: &vops, vma, tile_mask);
765	if (err)
766	return ERR_PTR(error: err);
767
768	err = xe_vma_ops_alloc(vops: &vops, array_of_binds: false);
769	if (err) {
770	fence = ERR_PTR(error: err);
771	goto free_ops;
772	}
773
774	fence = ops_execute(vm, vops: &vops);
775
776	free_ops:
777	list_for_each_entry_safe(op, next_op, &vops.list, link) {
778	list_del(entry: &op->link);
779	kfree(objp: op);
780	}
781	xe_vma_ops_fini(vops: &vops);
782
783	return fence;
784	}
785
786	static void xe_vm_populate_range_rebind(struct xe_vma_op *op,
787	struct xe_vma *vma,
788	struct xe_svm_range *range,
789	u8 tile_mask)
790	{
791	INIT_LIST_HEAD(list: &op->link);
792	op->tile_mask = tile_mask;
793	op->base.op = DRM_GPUVA_OP_DRIVER;
794	op->subop = XE_VMA_SUBOP_MAP_RANGE;
795	op->map_range.vma = vma;
796	op->map_range.range = range;
797	}
798
799	static int
800	xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
801	struct xe_vma *vma,
802	struct xe_svm_range *range,
803	u8 tile_mask)
804	{
805	struct xe_vma_op *op;
806
807	op = kzalloc(sizeof(*op), GFP_KERNEL);
808	if (!op)
809	return -ENOMEM;
810
811	xe_vm_populate_range_rebind(op, vma, range, tile_mask);
812	list_add_tail(new: &op->link, head: &vops->list);
813	xe_vma_ops_incr_pt_update_ops(vops, tile_mask, inc_val: `1`);
814
815	return `0`;
816	}
817
818	/**
819	* xe_vm_range_rebind() - VM range (re)bind
820	* @vm: The VM which the range belongs to.
821	* @vma: The VMA which the range belongs to.
822	* @range: SVM range to rebind.
823	* @tile_mask: Tile mask to bind the range to.
824	*
825	* (re)bind SVM range setting up GPU page tables for the range.
826	*
827	* Return: dma fence for rebind to signal completion on success, ERR_PTR on
828	* failure
829	*/
830	struct dma_fence xe_vm_range_rebind(struct* xe_vm *vm,
831	struct xe_vma *vma,
832	struct xe_svm_range *range,
833	u8 tile_mask)
834	{
835	struct dma_fence *fence = NULL;
836	struct xe_vma_ops vops;
837	struct xe_vma_op op, next_op;
838	struct xe_tile *tile;
839	u8 id;
840	int err;
841
842	lockdep_assert_held(&vm->lock);
843	xe_vm_assert_held(vm);
844	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
845	xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
846
847	xe_vma_ops_init(vops: &vops, vm, NULL, NULL, num_syncs: `0`);
848	vops.flags \|= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
849	for_each_tile(tile, vm->xe, id) {
850	vops.pt_update_ops[id].wait_vm_bookkeep = true;
851	vops.pt_update_ops[tile->id].q =
852	xe_migrate_exec_queue(migrate: tile->migrate);
853	}
854
855	err = xe_vm_ops_add_range_rebind(vops: &vops, vma, range, tile_mask);
856	if (err)
857	return ERR_PTR(error: err);
858
859	err = xe_vma_ops_alloc(vops: &vops, array_of_binds: false);
860	if (err) {
861	fence = ERR_PTR(error: err);
862	goto free_ops;
863	}
864
865	fence = ops_execute(vm, vops: &vops);
866
867	free_ops:
868	list_for_each_entry_safe(op, next_op, &vops.list, link) {
869	list_del(entry: &op->link);
870	kfree(objp: op);
871	}
872	xe_vma_ops_fini(vops: &vops);
873
874	return fence;
875	}
876
877	static void xe_vm_populate_range_unbind(struct xe_vma_op *op,
878	struct xe_svm_range *range)
879	{
880	INIT_LIST_HEAD(list: &op->link);
881	op->tile_mask = range->tile_present;
882	op->base.op = DRM_GPUVA_OP_DRIVER;
883	op->subop = XE_VMA_SUBOP_UNMAP_RANGE;
884	op->unmap_range.range = range;
885	}
886
887	static int
888	xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
889	struct xe_svm_range *range)
890	{
891	struct xe_vma_op *op;
892
893	op = kzalloc(sizeof(*op), GFP_KERNEL);
894	if (!op)
895	return -ENOMEM;
896
897	xe_vm_populate_range_unbind(op, range);
898	list_add_tail(new: &op->link, head: &vops->list);
899	xe_vma_ops_incr_pt_update_ops(vops, tile_mask: range->tile_present, inc_val: `1`);
900
901	return `0`;
902	}
903
904	/**
905	* xe_vm_range_unbind() - VM range unbind
906	* @vm: The VM which the range belongs to.
907	* @range: SVM range to rebind.
908	*
909	* Unbind SVM range removing the GPU page tables for the range.
910	*
911	* Return: dma fence for unbind to signal completion on success, ERR_PTR on
912	* failure
913	*/
914	struct dma_fence xe_vm_range_unbind(struct* xe_vm *vm,
915	struct xe_svm_range *range)
916	{
917	struct dma_fence *fence = NULL;
918	struct xe_vma_ops vops;
919	struct xe_vma_op op, next_op;
920	struct xe_tile *tile;
921	u8 id;
922	int err;
923
924	lockdep_assert_held(&vm->lock);
925	xe_vm_assert_held(vm);
926	xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
927
928	if (!range->tile_present)
929	return dma_fence_get_stub();
930
931	xe_vma_ops_init(vops: &vops, vm, NULL, NULL, num_syncs: `0`);
932	for_each_tile(tile, vm->xe, id) {
933	vops.pt_update_ops[id].wait_vm_bookkeep = true;
934	vops.pt_update_ops[tile->id].q =
935	xe_migrate_exec_queue(migrate: tile->migrate);
936	}
937
938	err = xe_vm_ops_add_range_unbind(vops: &vops, range);
939	if (err)
940	return ERR_PTR(error: err);
941
942	err = xe_vma_ops_alloc(vops: &vops, array_of_binds: false);
943	if (err) {
944	fence = ERR_PTR(error: err);
945	goto free_ops;
946	}
947
948	fence = ops_execute(vm, vops: &vops);
949
950	free_ops:
951	list_for_each_entry_safe(op, next_op, &vops.list, link) {
952	list_del(entry: &op->link);
953	kfree(objp: op);
954	}
955	xe_vma_ops_fini(vops: &vops);
956
957	return fence;
958	}
959
960	static void xe_vma_free(struct xe_vma *vma)
961	{
962	if (xe_vma_is_userptr(vma))
963	kfree(objp: to_userptr_vma(vma));
964	else
965	kfree(objp: vma);
966	}
967
968	static struct xe_vma xe_vma_create(struct* xe_vm *vm,
969	struct xe_bo *bo,
970	u64 bo_offset_or_userptr,
971	u64 start, u64 end,
972	struct xe_vma_mem_attr *attr,
973	unsigned int flags)
974	{
975	struct xe_vma *vma;
976	struct xe_tile *tile;
977	u8 id;
978	bool is_null = (flags & DRM_GPUVA_SPARSE);
979	bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
980
981	xe_assert(vm->xe, start < end);
982	xe_assert(vm->xe, end < vm->size);
983
984	/*
985	* Allocate and ensure that the xe_vma_is_userptr() return
986	* matches what was allocated.
987	*/
988	if (!bo && !is_null && !is_cpu_addr_mirror) {
989	struct xe_userptr_vma uvma = kzalloc(sizeof(uvma), GFP_KERNEL);
990
991	if (!uvma)
992	return ERR_PTR(error: -ENOMEM);
993
994	vma = &uvma->vma;
995	} else {
996	vma = kzalloc(sizeof(*vma), GFP_KERNEL);
997	if (!vma)
998	return ERR_PTR(error: -ENOMEM);
999
1000	if (bo)
1001	vma->gpuva.gem.obj = &bo->ttm.base;
1002	}
1003
1004	INIT_LIST_HEAD(list: &vma->combined_links.rebind);
1005
1006	INIT_LIST_HEAD(list: &vma->gpuva.gem.entry);
1007	vma->gpuva.vm = &vm->gpuvm;
1008	vma->gpuva.va.addr = start;
1009	vma->gpuva.va.range = end - start + `1`;
1010	vma->gpuva.flags = flags;
1011
1012	for_each_tile(tile, vm->xe, id)
1013	vma->tile_mask \|= `0x1` << id;
1014
1015	if (vm->xe->info.has_atomic_enable_pte_bit)
1016	vma->gpuva.flags \|= XE_VMA_ATOMIC_PTE_BIT;
1017
1018	vma->attr = *attr;
1019
1020	if (bo) {
1021	struct drm_gpuvm_bo *vm_bo;
1022
1023	xe_bo_assert_held(bo);
1024
1025	vm_bo = drm_gpuvm_bo_obtain(gpuvm: vma->gpuva.vm, obj: &bo->ttm.base);
1026	if (IS_ERR(ptr: vm_bo)) {
1027	xe_vma_free(vma);
1028	return ERR_CAST(ptr: vm_bo);
1029	}
1030
1031	drm_gpuvm_bo_extobj_add(vm_bo);
1032	drm_gem_object_get(obj: &bo->ttm.base);
1033	vma->gpuva.gem.offset = bo_offset_or_userptr;
1034	drm_gpuva_link(va: &vma->gpuva, vm_bo);
1035	drm_gpuvm_bo_put(vm_bo);
1036	} else / userptr or null / {
1037	if (!is_null && !is_cpu_addr_mirror) {
1038	struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1039	u64 size = end - start + `1`;
1040	int err;
1041
1042	vma->gpuva.gem.offset = bo_offset_or_userptr;
1043
1044	err = xe_userptr_setup(uvma, start: xe_vma_userptr(vma), range: size);
1045	if (err) {
1046	xe_vma_free(vma);
1047	return ERR_PTR(error: err);
1048	}
1049	}
1050
1051	xe_vm_get(vm);
1052	}
1053
1054	return vma;
1055	}
1056
1057	static void xe_vma_destroy_late(struct xe_vma *vma)
1058	{
1059	struct xe_vm *vm = xe_vma_vm(vma);
1060
1061	if (vma->ufence) {
1062	xe_sync_ufence_put(ufence: vma->ufence);
1063	vma->ufence = NULL;
1064	}
1065
1066	if (xe_vma_is_userptr(vma)) {
1067	struct xe_userptr_vma *uvma = to_userptr_vma(vma);
1068
1069	xe_userptr_remove(uvma);
1070	xe_vm_put(vm);
1071	} else if (xe_vma_is_null(vma) \|\| xe_vma_is_cpu_addr_mirror(vma)) {
1072	xe_vm_put(vm);
1073	} else {
1074	xe_bo_put(bo: xe_vma_bo(vma));
1075	}
1076
1077	xe_vma_free(vma);
1078	}
1079
1080	static void vma_destroy_work_func(struct work_struct *w)
1081	{
1082	struct xe_vma *vma =
1083	container_of(w, struct xe_vma, destroy_work);
1084
1085	xe_vma_destroy_late(vma);
1086	}
1087
1088	static void vma_destroy_cb(struct dma_fence *fence,
1089	struct dma_fence_cb *cb)
1090	{
1091	struct xe_vma vma = container_of(cb, struct* xe_vma, destroy_cb);
1092
1093	INIT_WORK(&vma->destroy_work, vma_destroy_work_func);
1094	queue_work(wq: system_unbound_wq, work: &vma->destroy_work);
1095	}
1096
1097	static void xe_vma_destroy(struct xe_vma vma, struct* dma_fence *fence)
1098	{
1099	struct xe_vm *vm = xe_vma_vm(vma);
1100
1101	lockdep_assert_held_write(&vm->lock);
1102	xe_assert(vm->xe, list_empty(&vma->combined_links.destroy));
1103
1104	if (xe_vma_is_userptr(vma)) {
1105	xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
1106	xe_userptr_destroy(uvma: to_userptr_vma(vma));
1107	} else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
1108	xe_bo_assert_held(bo: xe_vma_bo(vma));
1109
1110	drm_gpuva_unlink(va: &vma->gpuva);
1111	}
1112
1113	xe_vm_assert_held(vm);
1114	if (fence) {
1115	int ret = dma_fence_add_callback(fence, cb: &vma->destroy_cb,
1116	func: vma_destroy_cb);
1117
1118	if (ret) {
1119	XE_WARN_ON(ret != -ENOENT);
1120	xe_vma_destroy_late(vma);
1121	}
1122	} else {
1123	xe_vma_destroy_late(vma);
1124	}
1125	}
1126
1127	/**
1128	* xe_vm_lock_vma() - drm_exec utility to lock a vma
1129	* @exec: The drm_exec object we're currently locking for.
1130	* @vma: The vma for witch we want to lock the vm resv and any attached
1131	* object's resv.
1132	*
1133	* Return: 0 on success, negative error code on error. In particular
1134	* may return -EDEADLK on WW transaction contention and -EINTR if
1135	* an interruptible wait is terminated by a signal.
1136	*/
1137	int xe_vm_lock_vma(struct drm_exec exec, struct* xe_vma *vma)
1138	{
1139	struct xe_vm *vm = xe_vma_vm(vma);
1140	struct xe_bo *bo = xe_vma_bo(vma);
1141	int err;
1142
1143	XE_WARN_ON(!vm);
1144
1145	err = drm_exec_lock_obj(exec, obj: xe_vm_obj(vm));
1146	if (!err && bo && !bo->vm)
1147	err = drm_exec_lock_obj(exec, obj: &bo->ttm.base);
1148
1149	return err;
1150	}
1151
1152	static void xe_vma_destroy_unlocked(struct xe_vma *vma)
1153	{
1154	struct xe_device *xe = xe_vma_vm(vma)->xe;
1155	struct xe_validation_ctx ctx;
1156	struct drm_exec exec;
1157	int err = `0`;
1158
1159	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
1160	err = xe_vm_lock_vma(exec: &exec, vma);
1161	drm_exec_retry_on_contention(&exec);
1162	if (XE_WARN_ON(err))
1163	break;
1164	xe_vma_destroy(vma, NULL);
1165	}
1166	xe_assert(xe, !err);
1167	}
1168
1169	struct xe_vma *
1170	xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range)
1171	{
1172	struct drm_gpuva *gpuva;
1173
1174	lockdep_assert_held(&vm->lock);
1175
1176	if (xe_vm_is_closed_or_banned(vm))
1177	return NULL;
1178
1179	xe_assert(vm->xe, start + range <= vm->size);
1180
1181	gpuva = drm_gpuva_find_first(gpuvm: &vm->gpuvm, addr: start, range);
1182
1183	return gpuva ? gpuva_to_vma(gpuva) : NULL;
1184	}
1185
1186	static int xe_vm_insert_vma(struct xe_vm vm, struct* xe_vma *vma)
1187	{
1188	int err;
1189
1190	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1191	lockdep_assert_held(&vm->lock);
1192
1193	mutex_lock(&vm->snap_mutex);
1194	err = drm_gpuva_insert(gpuvm: &vm->gpuvm, va: &vma->gpuva);
1195	mutex_unlock(lock: &vm->snap_mutex);
1196	XE_WARN_ON(err); / Shouldn't be possible /
1197
1198	return err;
1199	}
1200
1201	static void xe_vm_remove_vma(struct xe_vm vm, struct* xe_vma *vma)
1202	{
1203	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
1204	lockdep_assert_held(&vm->lock);
1205
1206	mutex_lock(&vm->snap_mutex);
1207	drm_gpuva_remove(va: &vma->gpuva);
1208	mutex_unlock(lock: &vm->snap_mutex);
1209	if (vm->usm.last_fault_vma == vma)
1210	vm->usm.last_fault_vma = NULL;
1211	}
1212
1213	static struct drm_gpuva_op xe_vm_op_alloc(void*)
1214	{
1215	struct xe_vma_op *op;
1216
1217	op = kzalloc(sizeof(*op), GFP_KERNEL);
1218
1219	if (unlikely(!op))
1220	return NULL;
1221
1222	return &op->base;
1223	}
1224
1225	static void xe_vm_free(struct drm_gpuvm *gpuvm);
1226
1227	static const struct drm_gpuvm_ops gpuvm_ops = {
1228	.op_alloc = xe_vm_op_alloc,
1229	.vm_bo_validate = xe_gpuvm_validate,
1230	.vm_free = xe_vm_free,
1231	};
1232
1233	static u64 pde_encode_pat_index(u16 pat_index)
1234	{
1235	u64 pte = `0`;
1236
1237	if (pat_index & BIT(`0`))
1238	pte \|= XE_PPGTT_PTE_PAT0;
1239
1240	if (pat_index & BIT(`1`))
1241	pte \|= XE_PPGTT_PTE_PAT1;
1242
1243	return pte;
1244	}
1245
1246	static u64 pte_encode_pat_index(u16 pat_index, u32 pt_level)
1247	{
1248	u64 pte = `0`;
1249
1250	if (pat_index & BIT(`0`))
1251	pte \|= XE_PPGTT_PTE_PAT0;
1252
1253	if (pat_index & BIT(`1`))
1254	pte \|= XE_PPGTT_PTE_PAT1;
1255
1256	if (pat_index & BIT(`2`)) {
1257	if (pt_level)
1258	pte \|= XE_PPGTT_PDE_PDPE_PAT2;
1259	else
1260	pte \|= XE_PPGTT_PTE_PAT2;
1261	}
1262
1263	if (pat_index & BIT(`3`))
1264	pte \|= XELPG_PPGTT_PTE_PAT3;
1265
1266	if (pat_index & (BIT(`4`)))
1267	pte \|= XE2_PPGTT_PTE_PAT4;
1268
1269	return pte;
1270	}
1271
1272	static u64 pte_encode_ps(u32 pt_level)
1273	{
1274	XE_WARN_ON(pt_level > MAX_HUGEPTE_LEVEL);
1275
1276	if (pt_level == `1`)
1277	return XE_PDE_PS_2M;
1278	else if (pt_level == `2`)
1279	return XE_PDPE_PS_1G;
1280
1281	return `0`;
1282	}
1283
1284	static u16 pde_pat_index(struct xe_bo *bo)
1285	{
1286	struct xe_device *xe = xe_bo_device(bo);
1287	u16 pat_index;
1288
1289	/*
1290	* We only have two bits to encode the PAT index in non-leaf nodes, but
1291	* these only point to other paging structures so we only need a minimal
1292	* selection of options. The user PAT index is only for encoding leaf
1293	* nodes, where we have use of more bits to do the encoding. The
1294	* non-leaf nodes are instead under driver control so the chosen index
1295	* here should be distinct from the user PAT index. Also the
1296	* corresponding coherency of the PAT index should be tied to the
1297	* allocation type of the page table (or at least we should pick
1298	* something which is always safe).
1299	*/
1300	if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
1301	pat_index = xe->pat.idx[XE_CACHE_WB];
1302	else
1303	pat_index = xe->pat.idx[XE_CACHE_NONE];
1304
1305	xe_assert(xe, pat_index <= `3`);
1306
1307	return pat_index;
1308	}
1309
1310	static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
1311	{
1312	u64 pde;
1313
1314	pde = xe_bo_addr(bo, offset: bo_offset, XE_PAGE_SIZE);
1315	pde \|= XE_PAGE_PRESENT \| XE_PAGE_RW;
1316	pde \|= pde_encode_pat_index(pat_index: pde_pat_index(bo));
1317
1318	return pde;
1319	}
1320
1321	static u64 xelp_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
1322	u16 pat_index, u32 pt_level)
1323	{
1324	u64 pte;
1325
1326	pte = xe_bo_addr(bo, offset: bo_offset, XE_PAGE_SIZE);
1327	pte \|= XE_PAGE_PRESENT \| XE_PAGE_RW;
1328	pte \|= pte_encode_pat_index(pat_index, pt_level);
1329	pte \|= pte_encode_ps(pt_level);
1330
1331	if (xe_bo_is_vram(bo) \|\| xe_bo_is_stolen_devmem(bo))
1332	pte \|= XE_PPGTT_PTE_DM;
1333
1334	return pte;
1335	}
1336
1337	static u64 xelp_pte_encode_vma(u64 pte, struct xe_vma *vma,
1338	u16 pat_index, u32 pt_level)
1339	{
1340	pte \|= XE_PAGE_PRESENT;
1341
1342	if (likely(!xe_vma_read_only(vma)))
1343	pte \|= XE_PAGE_RW;
1344
1345	pte \|= pte_encode_pat_index(pat_index, pt_level);
1346	pte \|= pte_encode_ps(pt_level);
1347
1348	if (unlikely(xe_vma_is_null(vma)))
1349	pte \|= XE_PTE_NULL;
1350
1351	return pte;
1352	}
1353
1354	static u64 xelp_pte_encode_addr(struct xe_device *xe, u64 addr,
1355	u16 pat_index,
1356	u32 pt_level, bool devmem, u64 flags)
1357	{
1358	u64 pte;
1359
1360	/ Avoid passing random bits directly as flags /
1361	xe_assert(xe, !(flags & ~XE_PTE_PS64));
1362
1363	pte = addr;
1364	pte \|= XE_PAGE_PRESENT \| XE_PAGE_RW;
1365	pte \|= pte_encode_pat_index(pat_index, pt_level);
1366	pte \|= pte_encode_ps(pt_level);
1367
1368	if (devmem)
1369	pte \|= XE_PPGTT_PTE_DM;
1370
1371	pte \|= flags;
1372
1373	return pte;
1374	}
1375
1376	static const struct xe_pt_ops xelp_pt_ops = {
1377	.pte_encode_bo = xelp_pte_encode_bo,
1378	.pte_encode_vma = xelp_pte_encode_vma,
1379	.pte_encode_addr = xelp_pte_encode_addr,
1380	.pde_encode_bo = xelp_pde_encode_bo,
1381	};
1382
1383	static void vm_destroy_work_func(struct work_struct *w);
1384
1385	/**
1386	* xe_vm_create_scratch() - Setup a scratch memory pagetable tree for the
1387	* given tile and vm.
1388	* @xe: xe device.
1389	* @tile: tile to set up for.
1390	* @vm: vm to set up for.
1391	* @exec: The struct drm_exec object used to lock the vm resv.
1392	*
1393	* Sets up a pagetable tree with one page-table per level and a single
1394	* leaf PTE. All pagetable entries point to the single page-table or,
1395	* for MAX_HUGEPTE_LEVEL, a NULL huge PTE returning 0 on read and
1396	* writes become NOPs.
1397	*
1398	* Return: 0 on success, negative error code on error.
1399	*/
1400	static int xe_vm_create_scratch(struct xe_device xe, struct* xe_tile *tile,
1401	struct xe_vm vm, struct* drm_exec *exec)
1402	{
1403	u8 id = tile->id;
1404	int i;
1405
1406	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
1407	vm->scratch_pt[id][i] = xe_pt_create(vm, tile, level: i, exec);
1408	if (IS_ERR(ptr: vm->scratch_pt[id][i])) {
1409	int err = PTR_ERR(ptr: vm->scratch_pt[id][i]);
1410
1411	vm->scratch_pt[id][i] = NULL;
1412	return err;
1413	}
1414	xe_pt_populate_empty(tile, vm, pt: vm->scratch_pt[id][i]);
1415	}
1416
1417	return `0`;
1418	}
1419	ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
1420
1421	static void xe_vm_free_scratch(struct xe_vm *vm)
1422	{
1423	struct xe_tile *tile;
1424	u8 id;
1425
1426	if (!xe_vm_has_scratch(vm))
1427	return;
1428
1429	for_each_tile(tile, vm->xe, id) {
1430	u32 i;
1431
1432	if (!vm->pt_root[id])
1433	continue;
1434
1435	for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; ++i)
1436	if (vm->scratch_pt[id][i])
1437	xe_pt_destroy(pt: vm->scratch_pt[id][i], flags: vm->flags, NULL);
1438	}
1439	}
1440
1441	static void xe_vm_pt_destroy(struct xe_vm *vm)
1442	{
1443	struct xe_tile *tile;
1444	u8 id;
1445
1446	xe_vm_assert_held(vm);
1447
1448	for_each_tile(tile, vm->xe, id) {
1449	if (vm->pt_root[id]) {
1450	xe_pt_destroy(pt: vm->pt_root[id], flags: vm->flags, NULL);
1451	vm->pt_root[id] = NULL;
1452	}
1453	}
1454	}
1455
1456	struct xe_vm xe_vm_create(struct* xe_device xe, u32 flags, struct* xe_file *xef)
1457	{
1458	struct drm_gem_object *vm_resv_obj;
1459	struct xe_validation_ctx ctx;
1460	struct drm_exec exec;
1461	struct xe_vm *vm;
1462	int err;
1463	struct xe_tile *tile;
1464	u8 id;
1465
1466	/*
1467	* Since the GSCCS is not user-accessible, we don't expect a GSC VM to
1468	* ever be in faulting mode.
1469	*/
1470	xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
1471
1472	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
1473	if (!vm)
1474	return ERR_PTR(error: -ENOMEM);
1475
1476	vm->xe = xe;
1477
1478	vm->size = `1ull` << xe->info.va_bits;
1479	vm->flags = flags;
1480
1481	if (xef)
1482	vm->xef = xe_file_get(xef);
1483	/**
1484	* GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
1485	* manipulated under the PXP mutex. However, the PXP mutex can be taken
1486	* under a user-VM lock when the PXP session is started at exec_queue
1487	* creation time. Those are different VMs and therefore there is no risk
1488	* of deadlock, but we need to tell lockdep that this is the case or it
1489	* will print a warning.
1490	*/
1491	if (flags & XE_VM_FLAG_GSC) {
1492	static struct lock_class_key gsc_vm_key;
1493
1494	__init_rwsem(sem: &vm->lock, name: "gsc_vm", key: &gsc_vm_key);
1495	} else {
1496	init_rwsem(&vm->lock);
1497	}
1498	mutex_init(&vm->snap_mutex);
1499
1500	INIT_LIST_HEAD(list: &vm->rebind_list);
1501
1502	INIT_LIST_HEAD(list: &vm->userptr.repin_list);
1503	INIT_LIST_HEAD(list: &vm->userptr.invalidated);
1504	spin_lock_init(&vm->userptr.invalidated_lock);
1505
1506	ttm_lru_bulk_move_init(bulk: &vm->lru_bulk_move);
1507
1508	INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
1509
1510	INIT_LIST_HEAD(list: &vm->preempt.exec_queues);
1511	if (flags & XE_VM_FLAG_FAULT_MODE)
1512	vm->preempt.min_run_period_ms = `0`;
1513	else
1514	vm->preempt.min_run_period_ms = `5`;
1515
1516	for_each_tile(tile, xe, id)
1517	xe_range_fence_tree_init(tree: &vm->rftree[id]);
1518
1519	vm->pt_ops = &xelp_pt_ops;
1520
1521	/*
1522	* Long-running workloads are not protected by the scheduler references.
1523	* By design, run_job for long-running workloads returns NULL and the
1524	* scheduler drops all the references of it, hence protecting the VM
1525	* for this case is necessary.
1526	*/
1527	if (flags & XE_VM_FLAG_LR_MODE) {
1528	INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1529	xe_pm_runtime_get_noresume(xe);
1530	INIT_LIST_HEAD(list: &vm->preempt.pm_activate_link);
1531	}
1532
1533	err = xe_svm_init(vm);
1534	if (err)
1535	goto err_no_resv;
1536
1537	vm_resv_obj = drm_gpuvm_resv_object_alloc(drm: &xe->drm);
1538	if (!vm_resv_obj) {
1539	err = -ENOMEM;
1540	goto err_svm_fini;
1541	}
1542
1543	drm_gpuvm_init(gpuvm: &vm->gpuvm, name: "Xe VM", flags: DRM_GPUVM_RESV_PROTECTED, drm: &xe->drm,
1544	r_obj: vm_resv_obj, start_offset: `0`, range: vm->size, reserve_offset: `0`, reserve_range: `0`, ops: &gpuvm_ops);
1545
1546	drm_gem_object_put(obj: vm_resv_obj);
1547
1548	err = `0`;
1549	xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
1550	err) {
1551	err = xe_vm_drm_exec_lock(vm, exec: &exec);
1552	drm_exec_retry_on_contention(&exec);
1553
1554	if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
1555	vm->flags \|= XE_VM_FLAG_64K;
1556
1557	for_each_tile(tile, xe, id) {
1558	if (flags & XE_VM_FLAG_MIGRATION &&
1559	tile->id != XE_VM_FLAG_TILE_ID(flags))
1560	continue;
1561
1562	vm->pt_root[id] = xe_pt_create(vm, tile, level: xe->info.vm_max_level,
1563	exec: &exec);
1564	if (IS_ERR(ptr: vm->pt_root[id])) {
1565	err = PTR_ERR(ptr: vm->pt_root[id]);
1566	vm->pt_root[id] = NULL;
1567	xe_vm_pt_destroy(vm);
1568	drm_exec_retry_on_contention(&exec);
1569	xe_validation_retry_on_oom(&ctx, &err);
1570	break;
1571	}
1572	}
1573	if (err)
1574	break;
1575
1576	if (xe_vm_has_scratch(vm)) {
1577	for_each_tile(tile, xe, id) {
1578	if (!vm->pt_root[id])
1579	continue;
1580
1581	err = xe_vm_create_scratch(xe, tile, vm, exec: &exec);
1582	if (err) {
1583	xe_vm_free_scratch(vm);
1584	xe_vm_pt_destroy(vm);
1585	drm_exec_retry_on_contention(&exec);
1586	xe_validation_retry_on_oom(&ctx, &err);
1587	break;
1588	}
1589	}
1590	if (err)
1591	break;
1592	vm->batch_invalidate_tlb = true;
1593	}
1594
1595	if (vm->flags & XE_VM_FLAG_LR_MODE) {
1596	INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
1597	vm->batch_invalidate_tlb = false;
1598	}
1599
1600	/ Fill pt_root after allocating scratch tables /
1601	for_each_tile(tile, xe, id) {
1602	if (!vm->pt_root[id])
1603	continue;
1604
1605	xe_pt_populate_empty(tile, vm, pt: vm->pt_root[id]);
1606	}
1607	}
1608	if (err)
1609	goto err_close;
1610
1611	/ Kernel migration VM shouldn't have a circular loop.. /
1612	if (!(flags & XE_VM_FLAG_MIGRATION)) {
1613	for_each_tile(tile, xe, id) {
1614	struct xe_exec_queue *q;
1615	u32 create_flags = EXEC_QUEUE_FLAG_VM;
1616
1617	if (!vm->pt_root[id])
1618	continue;
1619
1620	q = xe_exec_queue_create_bind(xe, tile, user_vm: vm, flags: create_flags, extensions: `0`);
1621	if (IS_ERR(ptr: q)) {
1622	err = PTR_ERR(ptr: q);
1623	goto err_close;
1624	}
1625	vm->q[id] = q;
1626	}
1627	}
1628
1629	if (xef && xe->info.has_asid) {
1630	u32 asid;
1631
1632	down_write(sem: &xe->usm.lock);
1633	err = xa_alloc_cyclic(xa: &xe->usm.asid_to_vm, id: &asid, entry: vm,
1634	XA_LIMIT(`1`, XE_MAX_ASID - `1`),
1635	next: &xe->usm.next_asid, GFP_KERNEL);
1636	up_write(sem: &xe->usm.lock);
1637	if (err < `0`)
1638	goto err_close;
1639
1640	vm->usm.asid = asid;
1641	}
1642
1643	trace_xe_vm_create(vm);
1644
1645	return vm;
1646
1647	err_close:
1648	xe_vm_close_and_put(vm);
1649	return ERR_PTR(error: err);
1650
1651	err_svm_fini:
1652	if (flags & XE_VM_FLAG_FAULT_MODE) {
1653	vm->size = `0`; / close the vm /
1654	xe_svm_fini(vm);
1655	}
1656	err_no_resv:
1657	mutex_destroy(lock: &vm->snap_mutex);
1658	for_each_tile(tile, xe, id)
1659	xe_range_fence_tree_fini(tree: &vm->rftree[id]);
1660	ttm_lru_bulk_move_fini(bdev: &xe->ttm, bulk: &vm->lru_bulk_move);
1661	if (vm->xef)
1662	xe_file_put(xef: vm->xef);
1663	kfree(objp: vm);
1664	if (flags & XE_VM_FLAG_LR_MODE)
1665	xe_pm_runtime_put(xe);
1666	return ERR_PTR(error: err);
1667	}
1668
1669	static void xe_vm_close(struct xe_vm *vm)
1670	{
1671	struct xe_device *xe = vm->xe;
1672	bool bound;
1673	int idx;
1674
1675	bound = drm_dev_enter(dev: &xe->drm, idx: &idx);
1676
1677	down_write(sem: &vm->lock);
1678	if (xe_vm_in_fault_mode(vm))
1679	xe_svm_notifier_lock(vm);
1680
1681	vm->size = `0`;
1682
1683	if (!((vm->flags & XE_VM_FLAG_MIGRATION))) {
1684	struct xe_tile *tile;
1685	struct xe_gt *gt;
1686	u8 id;
1687
1688	/ Wait for pending binds /
1689	dma_resv_wait_timeout(obj: xe_vm_resv(vm),
1690	usage: DMA_RESV_USAGE_BOOKKEEP,
1691	intr: false, MAX_SCHEDULE_TIMEOUT);
1692
1693	if (bound) {
1694	for_each_tile(tile, xe, id)
1695	if (vm->pt_root[id])
1696	xe_pt_clear(xe, pt: vm->pt_root[id]);
1697
1698	for_each_gt(gt, xe, id)
1699	xe_tlb_inval_vm(tlb_inval: &gt->tlb_inval, vm);
1700	}
1701	}
1702
1703	if (xe_vm_in_fault_mode(vm))
1704	xe_svm_notifier_unlock(vm);
1705	up_write(sem: &vm->lock);
1706
1707	if (bound)
1708	drm_dev_exit(idx);
1709	}
1710
1711	void xe_vm_close_and_put(struct xe_vm *vm)
1712	{
1713	LIST_HEAD(contested);
1714	struct xe_device *xe = vm->xe;
1715	struct xe_tile *tile;
1716	struct xe_vma vma, next_vma;
1717	struct drm_gpuva gpuva, next;
1718	u8 id;
1719
1720	xe_assert(xe, !vm->preempt.num_exec_queues);
1721
1722	xe_vm_close(vm);
1723	if (xe_vm_in_preempt_fence_mode(vm)) {
1724	mutex_lock(&xe->rebind_resume_lock);
1725	list_del_init(entry: &vm->preempt.pm_activate_link);
1726	mutex_unlock(lock: &xe->rebind_resume_lock);
1727	flush_work(work: &vm->preempt.rebind_work);
1728	}
1729	if (xe_vm_in_fault_mode(vm))
1730	xe_svm_close(vm);
1731
1732	down_write(sem: &vm->lock);
1733	for_each_tile(tile, xe, id) {
1734	if (vm->q[id]) {
1735	int i;
1736
1737	xe_exec_queue_last_fence_put(e: vm->q[id], vm);
1738	for_each_tlb_inval(i)
1739	xe_exec_queue_tlb_inval_last_fence_put(q: vm->q[id], vm, type: i);
1740	}
1741	}
1742	up_write(sem: &vm->lock);
1743
1744	for_each_tile(tile, xe, id) {
1745	if (vm->q[id]) {
1746	xe_exec_queue_kill(q: vm->q[id]);
1747	xe_exec_queue_put(q: vm->q[id]);
1748	vm->q[id] = NULL;
1749	}
1750	}
1751
1752	down_write(sem: &vm->lock);
1753	xe_vm_lock(vm, intr: false);
1754	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
1755	vma = gpuva_to_vma(gpuva);
1756
1757	if (xe_vma_has_no_bo(vma)) {
1758	xe_svm_notifier_lock(vm);
1759	vma->gpuva.flags \|= XE_VMA_DESTROYED;
1760	xe_svm_notifier_unlock(vm);
1761	}
1762
1763	xe_vm_remove_vma(vm, vma);
1764
1765	/ easy case, remove from VMA? /
1766	if (xe_vma_has_no_bo(vma) \|\| xe_vma_bo(vma)->vm) {
1767	list_del_init(entry: &vma->combined_links.rebind);
1768	xe_vma_destroy(vma, NULL);
1769	continue;
1770	}
1771
1772	list_move_tail(list: &vma->combined_links.destroy, head: &contested);
1773	vma->gpuva.flags \|= XE_VMA_DESTROYED;
1774	}
1775
1776	/*
1777	* All vm operations will add shared fences to resv.
1778	* The only exception is eviction for a shared object,
1779	* but even so, the unbind when evicted would still
1780	* install a fence to resv. Hence it's safe to
1781	* destroy the pagetables immediately.
1782	*/
1783	xe_vm_free_scratch(vm);
1784	xe_vm_pt_destroy(vm);
1785	xe_vm_unlock(vm);
1786
1787	/*
1788	* VM is now dead, cannot re-add nodes to vm->vmas if it's NULL
1789	* Since we hold a refcount to the bo, we can remove and free
1790	* the members safely without locking.
1791	*/
1792	list_for_each_entry_safe(vma, next_vma, &contested,
1793	combined_links.destroy) {
1794	list_del_init(entry: &vma->combined_links.destroy);
1795	xe_vma_destroy_unlocked(vma);
1796	}
1797
1798	xe_svm_fini(vm);
1799
1800	up_write(sem: &vm->lock);
1801
1802	down_write(sem: &xe->usm.lock);
1803	if (vm->usm.asid) {
1804	void *lookup;
1805
1806	xe_assert(xe, xe->info.has_asid);
1807	xe_assert(xe, !(vm->flags & XE_VM_FLAG_MIGRATION));
1808
1809	lookup = xa_erase(&xe->usm.asid_to_vm, index: vm->usm.asid);
1810	xe_assert(xe, lookup == vm);
1811	}
1812	up_write(sem: &xe->usm.lock);
1813
1814	for_each_tile(tile, xe, id)
1815	xe_range_fence_tree_fini(tree: &vm->rftree[id]);
1816
1817	xe_vm_put(vm);
1818	}
1819
1820	static void vm_destroy_work_func(struct work_struct *w)
1821	{
1822	struct xe_vm *vm =
1823	container_of(w, struct xe_vm, destroy_work);
1824	struct xe_device *xe = vm->xe;
1825	struct xe_tile *tile;
1826	u8 id;
1827
1828	/ xe_vm_close_and_put was not called? /
1829	xe_assert(xe, !vm->size);
1830
1831	if (xe_vm_in_preempt_fence_mode(vm))
1832	flush_work(work: &vm->preempt.rebind_work);
1833
1834	mutex_destroy(lock: &vm->snap_mutex);
1835
1836	if (vm->flags & XE_VM_FLAG_LR_MODE)
1837	xe_pm_runtime_put(xe);
1838
1839	for_each_tile(tile, xe, id)
1840	XE_WARN_ON(vm->pt_root[id]);
1841
1842	trace_xe_vm_free(vm);
1843
1844	ttm_lru_bulk_move_fini(bdev: &xe->ttm, bulk: &vm->lru_bulk_move);
1845
1846	if (vm->xef)
1847	xe_file_put(xef: vm->xef);
1848
1849	kfree(objp: vm);
1850	}
1851
1852	static void xe_vm_free(struct drm_gpuvm *gpuvm)
1853	{
1854	struct xe_vm vm = container_of(gpuvm, struct* xe_vm, gpuvm);
1855
1856	/ To destroy the VM we need to be able to sleep /
1857	queue_work(wq: system_unbound_wq, work: &vm->destroy_work);
1858	}
1859
1860	struct xe_vm xe_vm_lookup(struct* xe_file *xef, u32 id)
1861	{
1862	struct xe_vm *vm;
1863
1864	mutex_lock(&xef->vm.lock);
1865	vm = xa_load(&xef->vm.xa, index: id);
1866	if (vm)
1867	xe_vm_get(vm);
1868	mutex_unlock(lock: &xef->vm.lock);
1869
1870	return vm;
1871	}
1872
1873	u64 xe_vm_pdp4_descriptor(struct xe_vm vm, struct* xe_tile *tile)
1874	{
1875	return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, `0`);
1876	}
1877
1878	static struct xe_exec_queue *
1879	to_wait_exec_queue(struct xe_vm vm, struct* xe_exec_queue *q)
1880	{
1881	return q ? q : vm->q[`0`];
1882	}
1883
1884	static struct xe_user_fence *
1885	find_ufence_get(struct xe_sync_entry *syncs, u32 num_syncs)
1886	{
1887	unsigned int i;
1888
1889	for (i = `0`; i < num_syncs; i++) {
1890	struct xe_sync_entry *e = &syncs[i];
1891
1892	if (xe_sync_is_ufence(sync: e))
1893	return xe_sync_ufence_get(sync: e);
1894	}
1895
1896	return NULL;
1897	}
1898
1899	#define ALL_DRM_XE_VM_CREATE_FLAGS (DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE \| \
1900	DRM_XE_VM_CREATE_FLAG_LR_MODE \| \
1901	DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1902
1903	int xe_vm_create_ioctl(struct drm_device dev, void* *data,
1904	struct drm_file *file)
1905	{
1906	struct xe_device *xe = to_xe_device(dev);
1907	struct xe_file *xef = to_xe_file(file);
1908	struct drm_xe_vm_create *args = data;
1909	struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
1910	struct xe_vm *vm;
1911	u32 id;
1912	int err;
1913	u32 flags = `0`;
1914
1915	if (XE_IOCTL_DBG(xe, args->extensions))
1916	return -EINVAL;
1917
1918	if (wa_gt && XE_GT_WA(wa_gt, `22014953428`))
1919	args->flags \|= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
1920
1921	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1922	!xe->info.has_usm))
1923	return -EINVAL;
1924
1925	if (XE_IOCTL_DBG(xe, args->reserved[`0`] \|\| args->reserved[`1`]))
1926	return -EINVAL;
1927
1928	if (XE_IOCTL_DBG(xe, args->flags & ~ALL_DRM_XE_VM_CREATE_FLAGS))
1929	return -EINVAL;
1930
1931	if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE &&
1932	args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
1933	!xe->info.needs_scratch))
1934	return -EINVAL;
1935
1936	if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE) &&
1937	args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE))
1938	return -EINVAL;
1939
1940	if (args->flags & DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE)
1941	flags \|= XE_VM_FLAG_SCRATCH_PAGE;
1942	if (args->flags & DRM_XE_VM_CREATE_FLAG_LR_MODE)
1943	flags \|= XE_VM_FLAG_LR_MODE;
1944	if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
1945	flags \|= XE_VM_FLAG_FAULT_MODE;
1946
1947	vm = xe_vm_create(xe, flags, xef);
1948	if (IS_ERR(ptr: vm))
1949	return PTR_ERR(ptr: vm);
1950
1951	#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
1952	/ Warning: Security issue - never enable by default /
1953	args->reserved[`0`] = xe_bo_main_addr(bo: vm->pt_root[`0`]->bo, XE_PAGE_SIZE);
1954	#endif
1955
1956	/ user id alloc must always be last in ioctl to prevent UAF /
1957	err = xa_alloc(xa: &xef->vm.xa, id: &id, entry: vm, xa_limit_32b, GFP_KERNEL);
1958	if (err)
1959	goto err_close_and_put;
1960
1961	args->vm_id = id;
1962
1963	return `0`;
1964
1965	err_close_and_put:
1966	xe_vm_close_and_put(vm);
1967
1968	return err;
1969	}
1970
1971	int xe_vm_destroy_ioctl(struct drm_device dev, void* *data,
1972	struct drm_file *file)
1973	{
1974	struct xe_device *xe = to_xe_device(dev);
1975	struct xe_file *xef = to_xe_file(file);
1976	struct drm_xe_vm_destroy *args = data;
1977	struct xe_vm *vm;
1978	int err = `0`;
1979
1980	if (XE_IOCTL_DBG(xe, args->pad) \|\|
1981	XE_IOCTL_DBG(xe, args->reserved[`0`] \|\| args->reserved[`1`]))
1982	return -EINVAL;
1983
1984	mutex_lock(&xef->vm.lock);
1985	vm = xa_load(&xef->vm.xa, index: args->vm_id);
1986	if (XE_IOCTL_DBG(xe, !vm))
1987	err = -ENOENT;
1988	else if (XE_IOCTL_DBG(xe, vm->preempt.num_exec_queues))
1989	err = -EBUSY;
1990	else
1991	xa_erase(&xef->vm.xa, index: args->vm_id);
1992	mutex_unlock(lock: &xef->vm.lock);
1993
1994	if (!err)
1995	xe_vm_close_and_put(vm);
1996
1997	return err;
1998	}
1999
2000	static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
2001	{
2002	struct drm_gpuva *gpuva;
2003	u32 num_vmas = `0`;
2004
2005	lockdep_assert_held(&vm->lock);
2006	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
2007	num_vmas++;
2008
2009	return num_vmas;
2010	}
2011
2012	static int get_mem_attrs(struct xe_vm vm, u32 num_vmas, u64 start,
2013	u64 end, struct drm_xe_mem_range_attr *attrs)
2014	{
2015	struct drm_gpuva *gpuva;
2016	int i = `0`;
2017
2018	lockdep_assert_held(&vm->lock);
2019
2020	drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
2021	struct xe_vma *vma = gpuva_to_vma(gpuva);
2022
2023	if (i == *num_vmas)
2024	return -ENOSPC;
2025
2026	attrs[i].start = xe_vma_start(vma);
2027	attrs[i].end = xe_vma_end(vma);
2028	attrs[i].atomic.val = vma->attr.atomic_access;
2029	attrs[i].pat_index.val = vma->attr.pat_index;
2030	attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
2031	attrs[i].preferred_mem_loc.migration_policy =
2032	vma->attr.preferred_loc.migration_policy;
2033
2034	i++;
2035	}
2036
2037	*num_vmas = i;
2038	return `0`;
2039	}
2040
2041	int xe_vm_query_vmas_attrs_ioctl(struct drm_device dev, void* data, struct* drm_file *file)
2042	{
2043	struct xe_device *xe = to_xe_device(dev);
2044	struct xe_file *xef = to_xe_file(file);
2045	struct drm_xe_mem_range_attr *mem_attrs;
2046	struct drm_xe_vm_query_mem_range_attr *args = data;
2047	u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2048	struct xe_vm *vm;
2049	int err = `0`;
2050
2051	if (XE_IOCTL_DBG(xe,
2052	((args->num_mem_ranges == `0` &&
2053	(attrs_user \|\| args->sizeof_mem_range_attr != `0`)) \|\|
2054	(args->num_mem_ranges > `0` &&
2055	(!attrs_user \|\|
2056	args->sizeof_mem_range_attr !=
2057	sizeof(struct drm_xe_mem_range_attr))))))
2058	return -EINVAL;
2059
2060	vm = xe_vm_lookup(xef, id: args->vm_id);
2061	if (XE_IOCTL_DBG(xe, !vm))
2062	return -EINVAL;
2063
2064	err = down_read_interruptible(sem: &vm->lock);
2065	if (err)
2066	goto put_vm;
2067
2068	attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
2069
2070	if (args->num_mem_ranges == `0` && !attrs_user) {
2071	args->num_mem_ranges = xe_vm_query_vmas(vm, start: args->start, end: args->start + args->range);
2072	args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
2073	goto unlock_vm;
2074	}
2075
2076	mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
2077	GFP_KERNEL \| __GFP_ACCOUNT \|
2078	__GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
2079	if (!mem_attrs) {
2080	err = args->num_mem_ranges > `1` ? -ENOBUFS : -ENOMEM;
2081	goto unlock_vm;
2082	}
2083
2084	memset(mem_attrs, `0`, args->num_mem_ranges * args->sizeof_mem_range_attr);
2085	err = get_mem_attrs(vm, num_vmas: &args->num_mem_ranges, start: args->start,
2086	end: args->start + args->range, attrs: mem_attrs);
2087	if (err)
2088	goto free_mem_attrs;
2089
2090	err = copy_to_user(to: attrs_user, from: mem_attrs,
2091	n: args->sizeof_mem_range_attr * args->num_mem_ranges);
2092	if (err)
2093	err = -EFAULT;
2094
2095	free_mem_attrs:
2096	kvfree(addr: mem_attrs);
2097	unlock_vm:
2098	up_read(sem: &vm->lock);
2099	put_vm:
2100	xe_vm_put(vm);
2101	return err;
2102	}
2103
2104	static bool vma_matches(struct xe_vma *vma, u64 page_addr)
2105	{
2106	if (page_addr > xe_vma_end(vma) - `1` \|\|
2107	page_addr + SZ_4K - `1` < xe_vma_start(vma))
2108	return false;
2109
2110	return true;
2111	}
2112
2113	/**
2114	* xe_vm_find_vma_by_addr() - Find a VMA by its address
2115	*
2116	* @vm: the xe_vm the vma belongs to
2117	* @page_addr: address to look up
2118	*/
2119	struct xe_vma xe_vm_find_vma_by_addr(struct* xe_vm *vm, u64 page_addr)
2120	{
2121	struct xe_vma *vma = NULL;
2122
2123	if (vm->usm.last_fault_vma) { / Fast lookup /
2124	if (vma_matches(vma: vm->usm.last_fault_vma, page_addr))
2125	vma = vm->usm.last_fault_vma;
2126	}
2127	if (!vma)
2128	vma = xe_vm_find_overlapping_vma(vm, start: page_addr, SZ_4K);
2129
2130	return vma;
2131	}
2132
2133	static const u32 region_to_mem_type[] = {
2134	XE_PL_TT,
2135	XE_PL_VRAM0,
2136	XE_PL_VRAM1,
2137	};
2138
2139	static void prep_vma_destroy(struct xe_vm vm, struct* xe_vma *vma,
2140	bool post_commit)
2141	{
2142	xe_svm_notifier_lock(vm);
2143	vma->gpuva.flags \|= XE_VMA_DESTROYED;
2144	xe_svm_notifier_unlock(vm);
2145	if (post_commit)
2146	xe_vm_remove_vma(vm, vma);
2147	}
2148
2149	#undef ULL
2150	#define ULL unsigned long long
2151
2152	#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
2153	static void print_op(struct xe_device xe, struct* drm_gpuva_op *op)
2154	{
2155	struct xe_vma *vma;
2156
2157	switch (op->op) {
2158	case DRM_GPUVA_OP_MAP:
2159	vm_dbg(&xe->drm, "MAP: addr=0x%016llx, range=0x%016llx",
2160	(ULL)op->map.va.addr, (ULL)op->map.va.range);
2161	break;
2162	case DRM_GPUVA_OP_REMAP:
2163	vma = gpuva_to_vma(gpuva: op->remap.unmap->va);
2164	vm_dbg(&xe->drm, "REMAP:UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2165	(ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2166	op->remap.unmap->keep ? `1` : `0`);
2167	if (op->remap.prev)
2168	vm_dbg(&xe->drm,
2169	"REMAP:PREV: addr=0x%016llx, range=0x%016llx",
2170	(ULL)op->remap.prev->va.addr,
2171	(ULL)op->remap.prev->va.range);
2172	if (op->remap.next)
2173	vm_dbg(&xe->drm,
2174	"REMAP:NEXT: addr=0x%016llx, range=0x%016llx",
2175	(ULL)op->remap.next->va.addr,
2176	(ULL)op->remap.next->va.range);
2177	break;
2178	case DRM_GPUVA_OP_UNMAP:
2179	vma = gpuva_to_vma(gpuva: op->unmap.va);
2180	vm_dbg(&xe->drm, "UNMAP: addr=0x%016llx, range=0x%016llx, keep=%d",
2181	(ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma),
2182	op->unmap.keep ? `1` : `0`);
2183	break;
2184	case DRM_GPUVA_OP_PREFETCH:
2185	vma = gpuva_to_vma(gpuva: op->prefetch.va);
2186	vm_dbg(&xe->drm, "PREFETCH: addr=0x%016llx, range=0x%016llx",
2187	(ULL)xe_vma_start(vma), (ULL)xe_vma_size(vma));
2188	break;
2189	default:
2190	drm_warn(&xe->drm, "NOT POSSIBLE");
2191	}
2192	}
2193	#else
2194	static void print_op(struct xe_device xe, struct* drm_gpuva_op *op)
2195	{
2196	}
2197	#endif
2198
2199	static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
2200	{
2201	if (!xe_vm_in_fault_mode(vm))
2202	return false;
2203
2204	if (!xe_vm_has_scratch(vm))
2205	return false;
2206
2207	if (bind_flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE)
2208	return false;
2209
2210	return true;
2211	}
2212
2213	static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
2214	{
2215	struct drm_gpuva_op *__op;
2216
2217	drm_gpuva_for_each_op(__op, ops) {
2218	struct xe_vma_op *op = gpuva_op_to_vma_op(op: __op);
2219
2220	xe_vma_svm_prefetch_op_fini(op);
2221	}
2222	}
2223
2224	/*
2225	* Create operations list from IOCTL arguments, setup operations fields so parse
2226	* and commit steps are decoupled from IOCTL arguments. This step can fail.
2227	*/
2228	static struct drm_gpuva_ops *
2229	vm_bind_ioctl_ops_create(struct xe_vm vm, struct* xe_vma_ops *vops,
2230	struct xe_bo *bo, u64 bo_offset_or_userptr,
2231	u64 addr, u64 range,
2232	u32 operation, u32 flags,
2233	u32 prefetch_region, u16 pat_index)
2234	{
2235	struct drm_gem_object *obj = bo ? &bo->ttm.base : NULL;
2236	struct drm_gpuva_ops *ops;
2237	struct drm_gpuva_op *__op;
2238	struct drm_gpuvm_bo *vm_bo;
2239	u64 range_end = addr + range;
2240	int err;
2241
2242	lockdep_assert_held_write(&vm->lock);
2243
2244	vm_dbg(&vm->xe->drm,
2245	"op=%d, addr=0x%016llx, range=0x%016llx, bo_offset_or_userptr=0x%016llx",
2246	operation, (ULL)addr, (ULL)range,
2247	(ULL)bo_offset_or_userptr);
2248
2249	switch (operation) {
2250	case DRM_XE_VM_BIND_OP_MAP:
2251	case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
2252	struct drm_gpuvm_map_req map_req = {
2253	.map.va.addr = addr,
2254	.map.va.range = range,
2255	.map.gem.obj = obj,
2256	.map.gem.offset = bo_offset_or_userptr,
2257	};
2258
2259	ops = drm_gpuvm_sm_map_ops_create(gpuvm: &vm->gpuvm, req: &map_req);
2260	break;
2261	}
2262	case DRM_XE_VM_BIND_OP_UNMAP:
2263	ops = drm_gpuvm_sm_unmap_ops_create(gpuvm: &vm->gpuvm, addr, range);
2264	break;
2265	case DRM_XE_VM_BIND_OP_PREFETCH:
2266	ops = drm_gpuvm_prefetch_ops_create(gpuvm: &vm->gpuvm, addr, range);
2267	break;
2268	case DRM_XE_VM_BIND_OP_UNMAP_ALL:
2269	xe_assert(vm->xe, bo);
2270
2271	err = xe_bo_lock(bo, intr: true);
2272	if (err)
2273	return ERR_PTR(error: err);
2274
2275	vm_bo = drm_gpuvm_bo_obtain(gpuvm: &vm->gpuvm, obj);
2276	if (IS_ERR(ptr: vm_bo)) {
2277	xe_bo_unlock(bo);
2278	return ERR_CAST(ptr: vm_bo);
2279	}
2280
2281	ops = drm_gpuvm_bo_unmap_ops_create(vm_bo);
2282	drm_gpuvm_bo_put(vm_bo);
2283	xe_bo_unlock(bo);
2284	break;
2285	default:
2286	drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2287	ops = ERR_PTR(error: -EINVAL);
2288	}
2289	if (IS_ERR(ptr: ops))
2290	return ops;
2291
2292	drm_gpuva_for_each_op(__op, ops) {
2293	struct xe_vma_op *op = gpuva_op_to_vma_op(op: __op);
2294
2295	if (__op->op == DRM_GPUVA_OP_MAP) {
2296	op->map.immediate =
2297	flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
2298	if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
2299	op->map.vma_flags \|= XE_VMA_READ_ONLY;
2300	if (flags & DRM_XE_VM_BIND_FLAG_NULL)
2301	op->map.vma_flags \|= DRM_GPUVA_SPARSE;
2302	if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
2303	op->map.vma_flags \|= XE_VMA_SYSTEM_ALLOCATOR;
2304	if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
2305	op->map.vma_flags \|= XE_VMA_DUMPABLE;
2306	if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
2307	op->map.vma_flags \|= XE_VMA_MADV_AUTORESET;
2308	op->map.pat_index = pat_index;
2309	op->map.invalidate_on_bind =
2310	__xe_vm_needs_clear_scratch_pages(vm, bind_flags: flags);
2311	} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
2312	struct xe_vma *vma = gpuva_to_vma(gpuva: op->base.prefetch.va);
2313	struct xe_tile *tile;
2314	struct xe_svm_range *svm_range;
2315	struct drm_gpusvm_ctx ctx = {};
2316	struct drm_pagemap *dpagemap;
2317	u8 id, tile_mask = `0`;
2318	u32 i;
2319
2320	if (!xe_vma_is_cpu_addr_mirror(vma)) {
2321	op->prefetch.region = prefetch_region;
2322	break;
2323	}
2324
2325	ctx.read_only = xe_vma_read_only(vma);
2326	ctx.devmem_possible = IS_DGFX(vm->xe) &&
2327	IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2328
2329	for_each_tile(tile, vm->xe, id)
2330	tile_mask \|= `0x1` << id;
2331
2332	xa_init_flags(xa: &op->prefetch_range.range, XA_FLAGS_ALLOC);
2333	op->prefetch_range.ranges_count = `0`;
2334	tile = NULL;
2335
2336	if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
2337	dpagemap = xe_vma_resolve_pagemap(vma,
2338	tile: xe_device_get_root_tile(xe: vm->xe));
2339	/*
2340	* TODO: Once multigpu support is enabled will need
2341	* something to dereference tile from dpagemap.
2342	*/
2343	if (dpagemap)
2344	tile = xe_device_get_root_tile(xe: vm->xe);
2345	} else if (prefetch_region) {
2346	tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
2347	XE_PL_VRAM0];
2348	}
2349
2350	op->prefetch_range.tile = tile;
2351	alloc_next_range:
2352	svm_range = xe_svm_range_find_or_insert(vm, addr, vma, ctx: &ctx);
2353
2354	if (PTR_ERR(ptr: svm_range) == -ENOENT) {
2355	u64 ret = xe_svm_find_vma_start(vm, addr, end: range_end, vma);
2356
2357	addr = ret == ULONG_MAX ? `0` : ret;
2358	if (addr)
2359	goto alloc_next_range;
2360	else
2361	goto print_op_label;
2362	}
2363
2364	if (IS_ERR(ptr: svm_range)) {
2365	err = PTR_ERR(ptr: svm_range);
2366	goto unwind_prefetch_ops;
2367	}
2368
2369	if (xe_svm_range_validate(vm, range: svm_range, tile_mask, devmem_preferred: !!tile)) {
2370	xe_svm_range_debug(range: svm_range, operation: "PREFETCH - RANGE IS VALID");
2371	goto check_next_range;
2372	}
2373
2374	err = xa_alloc(xa: &op->prefetch_range.range,
2375	id: &i, entry: svm_range, xa_limit_32b,
2376	GFP_KERNEL);
2377
2378	if (err)
2379	goto unwind_prefetch_ops;
2380
2381	op->prefetch_range.ranges_count++;
2382	vops->flags \|= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
2383	xe_svm_range_debug(range: svm_range, operation: "PREFETCH - RANGE CREATED");
2384	check_next_range:
2385	if (range_end > xe_svm_range_end(range: svm_range) &&
2386	xe_svm_range_end(range: svm_range) < xe_vma_end(vma)) {
2387	addr = xe_svm_range_end(range: svm_range);
2388	goto alloc_next_range;
2389	}
2390	}
2391	print_op_label:
2392	print_op(xe: vm->xe, op: __op);
2393	}
2394
2395	return ops;
2396
2397	unwind_prefetch_ops:
2398	xe_svm_prefetch_gpuva_ops_fini(ops);
2399	drm_gpuva_ops_free(gpuvm: &vm->gpuvm, ops);
2400	return ERR_PTR(error: err);
2401	}
2402
2403	ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
2404
2405	static struct xe_vma new_vma(struct* xe_vm vm, struct* drm_gpuva_op_map *op,
2406	struct xe_vma_mem_attr attr, unsigned* int flags)
2407	{
2408	struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(obj: op->gem.obj) : NULL;
2409	struct xe_validation_ctx ctx;
2410	struct drm_exec exec;
2411	struct xe_vma *vma;
2412	int err = `0`;
2413
2414	lockdep_assert_held_write(&vm->lock);
2415
2416	if (bo) {
2417	err = `0`;
2418	xe_validation_guard(&ctx, &vm->xe->val, &exec,
2419	(struct xe_val_flags) {.interruptible = true}, err) {
2420	if (!bo->vm) {
2421	err = drm_exec_lock_obj(exec: &exec, obj: xe_vm_obj(vm));
2422	drm_exec_retry_on_contention(&exec);
2423	}
2424	if (!err) {
2425	err = drm_exec_lock_obj(exec: &exec, obj: &bo->ttm.base);
2426	drm_exec_retry_on_contention(&exec);
2427	}
2428	if (err)
2429	return ERR_PTR(error: err);
2430
2431	vma = xe_vma_create(vm, bo, bo_offset_or_userptr: op->gem.offset,
2432	start: op->va.addr, end: op->va.addr +
2433	op->va.range - `1`, attr, flags);
2434	if (IS_ERR(ptr: vma))
2435	return vma;
2436
2437	if (!bo->vm) {
2438	err = add_preempt_fences(vm, bo);
2439	if (err) {
2440	prep_vma_destroy(vm, vma, post_commit: false);
2441	xe_vma_destroy(vma, NULL);
2442	}
2443	}
2444	}
2445	if (err)
2446	return ERR_PTR(error: err);
2447	} else {
2448	vma = xe_vma_create(vm, NULL, bo_offset_or_userptr: op->gem.offset,
2449	start: op->va.addr, end: op->va.addr +
2450	op->va.range - `1`, attr, flags);
2451	if (IS_ERR(ptr: vma))
2452	return vma;
2453
2454	if (xe_vma_is_userptr(vma))
2455	err = xe_vma_userptr_pin_pages(uvma: to_userptr_vma(vma));
2456	}
2457	if (err) {
2458	prep_vma_destroy(vm, vma, post_commit: false);
2459	xe_vma_destroy_unlocked(vma);
2460	vma = ERR_PTR(error: err);
2461	}
2462
2463	return vma;
2464	}
2465
2466	static u64 xe_vma_max_pte_size(struct xe_vma *vma)
2467	{
2468	if (vma->gpuva.flags & XE_VMA_PTE_1G)
2469	return SZ_1G;
2470	else if (vma->gpuva.flags & (XE_VMA_PTE_2M \| XE_VMA_PTE_COMPACT))
2471	return SZ_2M;
2472	else if (vma->gpuva.flags & XE_VMA_PTE_64K)
2473	return SZ_64K;
2474	else if (vma->gpuva.flags & XE_VMA_PTE_4K)
2475	return SZ_4K;
2476
2477	return SZ_1G; / Uninitialized, used max size /
2478	}
2479
2480	static void xe_vma_set_pte_size(struct xe_vma *vma, u64 size)
2481	{
2482	switch (size) {
2483	case SZ_1G:
2484	vma->gpuva.flags \|= XE_VMA_PTE_1G;
2485	break;
2486	case SZ_2M:
2487	vma->gpuva.flags \|= XE_VMA_PTE_2M;
2488	break;
2489	case SZ_64K:
2490	vma->gpuva.flags \|= XE_VMA_PTE_64K;
2491	break;
2492	case SZ_4K:
2493	vma->gpuva.flags \|= XE_VMA_PTE_4K;
2494	break;
2495	}
2496	}
2497
2498	static int xe_vma_op_commit(struct xe_vm vm, struct* xe_vma_op *op)
2499	{
2500	int err = `0`;
2501
2502	lockdep_assert_held_write(&vm->lock);
2503
2504	switch (op->base.op) {
2505	case DRM_GPUVA_OP_MAP:
2506	err \|= xe_vm_insert_vma(vm, vma: op->map.vma);
2507	if (!err)
2508	op->flags \|= XE_VMA_OP_COMMITTED;
2509	break;
2510	case DRM_GPUVA_OP_REMAP:
2511	{
2512	u8 tile_present =
2513	gpuva_to_vma(gpuva: op->base.remap.unmap->va)->tile_present;
2514
2515	prep_vma_destroy(vm, vma: gpuva_to_vma(gpuva: op->base.remap.unmap->va),
2516	post_commit: true);
2517	op->flags \|= XE_VMA_OP_COMMITTED;
2518
2519	if (op->remap.prev) {
2520	err \|= xe_vm_insert_vma(vm, vma: op->remap.prev);
2521	if (!err)
2522	op->flags \|= XE_VMA_OP_PREV_COMMITTED;
2523	if (!err && op->remap.skip_prev) {
2524	op->remap.prev->tile_present =
2525	tile_present;
2526	op->remap.prev = NULL;
2527	}
2528	}
2529	if (op->remap.next) {
2530	err \|= xe_vm_insert_vma(vm, vma: op->remap.next);
2531	if (!err)
2532	op->flags \|= XE_VMA_OP_NEXT_COMMITTED;
2533	if (!err && op->remap.skip_next) {
2534	op->remap.next->tile_present =
2535	tile_present;
2536	op->remap.next = NULL;
2537	}
2538	}
2539
2540	/ Adjust for partial unbind after removing VMA from VM /
2541	if (!err) {
2542	op->base.remap.unmap->va->va.addr = op->remap.start;
2543	op->base.remap.unmap->va->va.range = op->remap.range;
2544	}
2545	break;
2546	}
2547	case DRM_GPUVA_OP_UNMAP:
2548	prep_vma_destroy(vm, vma: gpuva_to_vma(gpuva: op->base.unmap.va), post_commit: true);
2549	op->flags \|= XE_VMA_OP_COMMITTED;
2550	break;
2551	case DRM_GPUVA_OP_PREFETCH:
2552	op->flags \|= XE_VMA_OP_COMMITTED;
2553	break;
2554	default:
2555	drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2556	}
2557
2558	return err;
2559	}
2560
2561	/**
2562	* xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
2563	* @vma: Pointer to the xe_vma structure to check
2564	*
2565	* This function determines whether the given VMA (Virtual Memory Area)
2566	* has its memory attributes set to their default values. Specifically,
2567	* it checks the following conditions:
2568	*
2569	* - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
2570	* - `pat_index` is equal to `default_pat_index`
2571	* - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
2572	* - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
2573	*
2574	* Return: true if all attributes are at their default values, false otherwise.
2575	*/
2576	bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
2577	{
2578	return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
2579	vma->attr.pat_index == vma->attr.default_pat_index &&
2580	vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
2581	vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
2582	}
2583
2584	static int vm_bind_ioctl_ops_parse(struct xe_vm vm, struct* drm_gpuva_ops *ops,
2585	struct xe_vma_ops *vops)
2586	{
2587	struct xe_device *xe = vm->xe;
2588	struct drm_gpuva_op *__op;
2589	struct xe_tile *tile;
2590	u8 id, tile_mask = `0`;
2591	int err = `0`;
2592
2593	lockdep_assert_held_write(&vm->lock);
2594
2595	for_each_tile(tile, vm->xe, id)
2596	tile_mask \|= `0x1` << id;
2597
2598	drm_gpuva_for_each_op(__op, ops) {
2599	struct xe_vma_op *op = gpuva_op_to_vma_op(op: __op);
2600	struct xe_vma *vma;
2601	unsigned int flags = `0`;
2602
2603	INIT_LIST_HEAD(list: &op->link);
2604	list_add_tail(new: &op->link, head: &vops->list);
2605	op->tile_mask = tile_mask;
2606
2607	switch (op->base.op) {
2608	case DRM_GPUVA_OP_MAP:
2609	{
2610	struct xe_vma_mem_attr default_attr = {
2611	.preferred_loc = {
2612	.devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
2613	.migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
2614	},
2615	.atomic_access = DRM_XE_ATOMIC_UNDEFINED,
2616	.default_pat_index = op->map.pat_index,
2617	.pat_index = op->map.pat_index,
2618	};
2619
2620	flags \|= op->map.vma_flags & XE_VMA_CREATE_MASK;
2621
2622	vma = new_vma(vm, op: &op->base.map, attr: &default_attr,
2623	flags);
2624	if (IS_ERR(ptr: vma))
2625	return PTR_ERR(ptr: vma);
2626
2627	op->map.vma = vma;
2628	if (((op->map.immediate \|\| !xe_vm_in_fault_mode(vm)) &&
2629	!(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) \|\|
2630	op->map.invalidate_on_bind)
2631	xe_vma_ops_incr_pt_update_ops(vops,
2632	tile_mask: op->tile_mask, inc_val: `1`);
2633	break;
2634	}
2635	case DRM_GPUVA_OP_REMAP:
2636	{
2637	struct xe_vma *old =
2638	gpuva_to_vma(gpuva: op->base.remap.unmap->va);
2639	bool skip = xe_vma_is_cpu_addr_mirror(vma: old);
2640	u64 start = xe_vma_start(vma: old), end = xe_vma_end(vma: old);
2641	int num_remap_ops = `0`;
2642
2643	if (op->base.remap.prev)
2644	start = op->base.remap.prev->va.addr +
2645	op->base.remap.prev->va.range;
2646	if (op->base.remap.next)
2647	end = op->base.remap.next->va.addr;
2648
2649	if (xe_vma_is_cpu_addr_mirror(vma: old) &&
2650	xe_svm_has_mapping(vm, start, end)) {
2651	if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
2652	xe_svm_unmap_address_range(vm, start, end);
2653	else
2654	return -EBUSY;
2655	}
2656
2657	op->remap.start = xe_vma_start(vma: old);
2658	op->remap.range = xe_vma_size(vma: old);
2659
2660	flags \|= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
2661	if (op->base.remap.prev) {
2662	vma = new_vma(vm, op: op->base.remap.prev,
2663	attr: &old->attr, flags);
2664	if (IS_ERR(ptr: vma))
2665	return PTR_ERR(ptr: vma);
2666
2667	op->remap.prev = vma;
2668
2669	/*
2670	* Userptr creates a new SG mapping so
2671	* we must also rebind.
2672	*/
2673	op->remap.skip_prev = skip \|\|
2674	(!xe_vma_is_userptr(vma: old) &&
2675	IS_ALIGNED(xe_vma_end(vma),
2676	xe_vma_max_pte_size(old)));
2677	if (op->remap.skip_prev) {
2678	xe_vma_set_pte_size(vma, size: xe_vma_max_pte_size(vma: old));
2679	op->remap.range -=
2680	xe_vma_end(vma) -
2681	xe_vma_start(vma: old);
2682	op->remap.start = xe_vma_end(vma);
2683	vm_dbg(&xe->drm, "REMAP:SKIP_PREV: addr=0x%016llx, range=0x%016llx",
2684	(ULL)op->remap.start,
2685	(ULL)op->remap.range);
2686	} else {
2687	num_remap_ops++;
2688	}
2689	}
2690
2691	if (op->base.remap.next) {
2692	vma = new_vma(vm, op: op->base.remap.next,
2693	attr: &old->attr, flags);
2694	if (IS_ERR(ptr: vma))
2695	return PTR_ERR(ptr: vma);
2696
2697	op->remap.next = vma;
2698
2699	/*
2700	* Userptr creates a new SG mapping so
2701	* we must also rebind.
2702	*/
2703	op->remap.skip_next = skip \|\|
2704	(!xe_vma_is_userptr(vma: old) &&
2705	IS_ALIGNED(xe_vma_start(vma),
2706	xe_vma_max_pte_size(old)));
2707	if (op->remap.skip_next) {
2708	xe_vma_set_pte_size(vma, size: xe_vma_max_pte_size(vma: old));
2709	op->remap.range -=
2710	xe_vma_end(vma: old) -
2711	xe_vma_start(vma);
2712	vm_dbg(&xe->drm, "REMAP:SKIP_NEXT: addr=0x%016llx, range=0x%016llx",
2713	(ULL)op->remap.start,
2714	(ULL)op->remap.range);
2715	} else {
2716	num_remap_ops++;
2717	}
2718	}
2719	if (!skip)
2720	num_remap_ops++;
2721
2722	xe_vma_ops_incr_pt_update_ops(vops, tile_mask: op->tile_mask, inc_val: num_remap_ops);
2723	break;
2724	}
2725	case DRM_GPUVA_OP_UNMAP:
2726	vma = gpuva_to_vma(gpuva: op->base.unmap.va);
2727
2728	if (xe_vma_is_cpu_addr_mirror(vma) &&
2729	xe_svm_has_mapping(vm, start: xe_vma_start(vma),
2730	end: xe_vma_end(vma)))
2731	return -EBUSY;
2732
2733	if (!xe_vma_is_cpu_addr_mirror(vma))
2734	xe_vma_ops_incr_pt_update_ops(vops, tile_mask: op->tile_mask, inc_val: `1`);
2735	break;
2736	case DRM_GPUVA_OP_PREFETCH:
2737	vma = gpuva_to_vma(gpuva: op->base.prefetch.va);
2738
2739	if (xe_vma_is_userptr(vma)) {
2740	err = xe_vma_userptr_pin_pages(uvma: to_userptr_vma(vma));
2741	if (err)
2742	return err;
2743	}
2744
2745	if (xe_vma_is_cpu_addr_mirror(vma))
2746	xe_vma_ops_incr_pt_update_ops(vops, tile_mask: op->tile_mask,
2747	inc_val: op->prefetch_range.ranges_count);
2748	else
2749	xe_vma_ops_incr_pt_update_ops(vops, tile_mask: op->tile_mask, inc_val: `1`);
2750
2751	break;
2752	default:
2753	drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2754	}
2755
2756	err = xe_vma_op_commit(vm, op);
2757	if (err)
2758	return err;
2759	}
2760
2761	return `0`;
2762	}
2763
2764	static void xe_vma_op_unwind(struct xe_vm vm, struct* xe_vma_op *op,
2765	bool post_commit, bool prev_post_commit,
2766	bool next_post_commit)
2767	{
2768	lockdep_assert_held_write(&vm->lock);
2769
2770	switch (op->base.op) {
2771	case DRM_GPUVA_OP_MAP:
2772	if (op->map.vma) {
2773	prep_vma_destroy(vm, vma: op->map.vma, post_commit);
2774	xe_vma_destroy_unlocked(vma: op->map.vma);
2775	}
2776	break;
2777	case DRM_GPUVA_OP_UNMAP:
2778	{
2779	struct xe_vma *vma = gpuva_to_vma(gpuva: op->base.unmap.va);
2780
2781	if (vma) {
2782	xe_svm_notifier_lock(vm);
2783	vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2784	xe_svm_notifier_unlock(vm);
2785	if (post_commit)
2786	xe_vm_insert_vma(vm, vma);
2787	}
2788	break;
2789	}
2790	case DRM_GPUVA_OP_REMAP:
2791	{
2792	struct xe_vma *vma = gpuva_to_vma(gpuva: op->base.remap.unmap->va);
2793
2794	if (op->remap.prev) {
2795	prep_vma_destroy(vm, vma: op->remap.prev, post_commit: prev_post_commit);
2796	xe_vma_destroy_unlocked(vma: op->remap.prev);
2797	}
2798	if (op->remap.next) {
2799	prep_vma_destroy(vm, vma: op->remap.next, post_commit: next_post_commit);
2800	xe_vma_destroy_unlocked(vma: op->remap.next);
2801	}
2802	if (vma) {
2803	xe_svm_notifier_lock(vm);
2804	vma->gpuva.flags &= ~XE_VMA_DESTROYED;
2805	xe_svm_notifier_unlock(vm);
2806	if (post_commit)
2807	xe_vm_insert_vma(vm, vma);
2808	}
2809	break;
2810	}
2811	case DRM_GPUVA_OP_PREFETCH:
2812	/ Nothing to do /
2813	break;
2814	default:
2815	drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2816	}
2817	}
2818
2819	static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
2820	struct drm_gpuva_ops **ops,
2821	int num_ops_list)
2822	{
2823	int i;
2824
2825	for (i = num_ops_list - `1`; i >= `0`; --i) {
2826	struct drm_gpuva_ops *__ops = ops[i];
2827	struct drm_gpuva_op *__op;
2828
2829	if (!__ops)
2830	continue;
2831
2832	drm_gpuva_for_each_op_reverse(__op, __ops) {
2833	struct xe_vma_op *op = gpuva_op_to_vma_op(op: __op);
2834
2835	xe_vma_op_unwind(vm, op,
2836	post_commit: op->flags & XE_VMA_OP_COMMITTED,
2837	prev_post_commit: op->flags & XE_VMA_OP_PREV_COMMITTED,
2838	next_post_commit: op->flags & XE_VMA_OP_NEXT_COMMITTED);
2839	}
2840	}
2841	}
2842
2843	static int vma_lock_and_validate(struct drm_exec exec, struct* xe_vma *vma,
2844	bool res_evict, bool validate)
2845	{
2846	struct xe_bo *bo = xe_vma_bo(vma);
2847	struct xe_vm *vm = xe_vma_vm(vma);
2848	int err = `0`;
2849
2850	if (bo) {
2851	if (!bo->vm)
2852	err = drm_exec_lock_obj(exec, obj: &bo->ttm.base);
2853	if (!err && validate)
2854	err = xe_bo_validate(bo, vm,
2855	allow_res_evict: !xe_vm_in_preempt_fence_mode(vm) &&
2856	res_evict, exec);
2857	}
2858
2859	return err;
2860	}
2861
2862	static int check_ufence(struct xe_vma *vma)
2863	{
2864	if (vma->ufence) {
2865	struct xe_user_fence * const f = vma->ufence;
2866
2867	if (!xe_sync_ufence_get_status(ufence: f))
2868	return -EBUSY;
2869
2870	vma->ufence = NULL;
2871	xe_sync_ufence_put(ufence: f);
2872	}
2873
2874	return `0`;
2875	}
2876
2877	static int prefetch_ranges(struct xe_vm vm, struct* xe_vma_op *op)
2878	{
2879	bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
2880	struct xe_vma *vma = gpuva_to_vma(gpuva: op->base.prefetch.va);
2881	struct xe_tile *tile = op->prefetch_range.tile;
2882	int err = `0`;
2883
2884	struct xe_svm_range *svm_range;
2885	struct drm_gpusvm_ctx ctx = {};
2886	unsigned long i;
2887
2888	if (!xe_vma_is_cpu_addr_mirror(vma))
2889	return `0`;
2890
2891	ctx.read_only = xe_vma_read_only(vma);
2892	ctx.devmem_possible = devmem_possible;
2893	ctx.check_pages_threshold = devmem_possible ? SZ_64K : `0`;
2894	ctx.device_private_page_owner = xe_svm_devm_owner(xe: vm->xe);
2895
2896	/ TODO: Threading the migration /
2897	xa_for_each(&op->prefetch_range.range, i, svm_range) {
2898	if (!tile)
2899	xe_svm_range_migrate_to_smem(vm, range: svm_range);
2900
2901	if (xe_svm_range_needs_migrate_to_vram(range: svm_range, vma, preferred_region_is_vram: !!tile)) {
2902	err = xe_svm_alloc_vram(tile, range: svm_range, ctx: &ctx);
2903	if (err) {
2904	drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
2905	vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2906	return -ENODATA;
2907	}
2908	xe_svm_range_debug(range: svm_range, operation: "PREFETCH - RANGE MIGRATED TO VRAM");
2909	}
2910
2911	err = xe_svm_range_get_pages(vm, range: svm_range, ctx: &ctx);
2912	if (err) {
2913	drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
2914	vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
2915	if (err == -EOPNOTSUPP \|\| err == -EFAULT \|\| err == -EPERM)
2916	err = -ENODATA;
2917	return err;
2918	}
2919	xe_svm_range_debug(range: svm_range, operation: "PREFETCH - RANGE GET PAGES DONE");
2920	}
2921
2922	return err;
2923	}
2924
2925	static int op_lock_and_prep(struct drm_exec exec, struct* xe_vm *vm,
2926	struct xe_vma_ops vops, struct* xe_vma_op *op)
2927	{
2928	int err = `0`;
2929	bool res_evict;
2930
2931	/*
2932	* We only allow evicting a BO within the VM if it is not part of an
2933	* array of binds, as an array of binds can evict another BO within the
2934	* bind.
2935	*/
2936	res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
2937
2938	switch (op->base.op) {
2939	case DRM_GPUVA_OP_MAP:
2940	if (!op->map.invalidate_on_bind)
2941	err = vma_lock_and_validate(exec, vma: op->map.vma,
2942	res_evict,
2943	validate: !xe_vm_in_fault_mode(vm) \|\|
2944	op->map.immediate);
2945	break;
2946	case DRM_GPUVA_OP_REMAP:
2947	err = check_ufence(vma: gpuva_to_vma(gpuva: op->base.remap.unmap->va));
2948	if (err)
2949	break;
2950
2951	err = vma_lock_and_validate(exec,
2952	vma: gpuva_to_vma(gpuva: op->base.remap.unmap->va),
2953	res_evict, validate: false);
2954	if (!err && op->remap.prev)
2955	err = vma_lock_and_validate(exec, vma: op->remap.prev,
2956	res_evict, validate: true);
2957	if (!err && op->remap.next)
2958	err = vma_lock_and_validate(exec, vma: op->remap.next,
2959	res_evict, validate: true);
2960	break;
2961	case DRM_GPUVA_OP_UNMAP:
2962	err = check_ufence(vma: gpuva_to_vma(gpuva: op->base.unmap.va));
2963	if (err)
2964	break;
2965
2966	err = vma_lock_and_validate(exec,
2967	vma: gpuva_to_vma(gpuva: op->base.unmap.va),
2968	res_evict, validate: false);
2969	break;
2970	case DRM_GPUVA_OP_PREFETCH:
2971	{
2972	struct xe_vma *vma = gpuva_to_vma(gpuva: op->base.prefetch.va);
2973	u32 region;
2974
2975	if (!xe_vma_is_cpu_addr_mirror(vma)) {
2976	region = op->prefetch.region;
2977	xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC \|\|
2978	region <= ARRAY_SIZE(region_to_mem_type));
2979	}
2980
2981	err = vma_lock_and_validate(exec,
2982	vma: gpuva_to_vma(gpuva: op->base.prefetch.va),
2983	res_evict, validate: false);
2984	if (!err && !xe_vma_has_no_bo(vma))
2985	err = xe_bo_migrate(bo: xe_vma_bo(vma),
2986	mem_type: region_to_mem_type[region],
2987	NULL,
2988	exec);
2989	break;
2990	}
2991	default:
2992	drm_warn(&vm->xe->drm, "NOT POSSIBLE");
2993	}
2994
2995	return err;
2996	}
2997
2998	static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm vm, struct* xe_vma_ops *vops)
2999	{
3000	struct xe_vma_op *op;
3001	int err;
3002
3003	if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
3004	return `0`;
3005
3006	list_for_each_entry(op, &vops->list, link) {
3007	if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
3008	err = prefetch_ranges(vm, op);
3009	if (err)
3010	return err;
3011	}
3012	}
3013
3014	return `0`;
3015	}
3016
3017	static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
3018	struct xe_vm *vm,
3019	struct xe_vma_ops *vops)
3020	{
3021	struct xe_vma_op *op;
3022	int err;
3023
3024	err = drm_exec_lock_obj(exec, obj: xe_vm_obj(vm));
3025	if (err)
3026	return err;
3027
3028	list_for_each_entry(op, &vops->list, link) {
3029	err = op_lock_and_prep(exec, vm, vops, op);
3030	if (err)
3031	return err;
3032	}
3033
3034	#ifdef TEST_VM_OPS_ERROR
3035	if (vops->inject_error &&
3036	vm->xe->vm_inject_error_position == FORCE_OP_ERROR_LOCK)
3037	return -ENOSPC;
3038	#endif
3039
3040	return `0`;
3041	}
3042
3043	static void op_trace(struct xe_vma_op *op)
3044	{
3045	switch (op->base.op) {
3046	case DRM_GPUVA_OP_MAP:
3047	trace_xe_vma_bind(vma: op->map.vma);
3048	break;
3049	case DRM_GPUVA_OP_REMAP:
3050	trace_xe_vma_unbind(vma: gpuva_to_vma(gpuva: op->base.remap.unmap->va));
3051	if (op->remap.prev)
3052	trace_xe_vma_bind(vma: op->remap.prev);
3053	if (op->remap.next)
3054	trace_xe_vma_bind(vma: op->remap.next);
3055	break;
3056	case DRM_GPUVA_OP_UNMAP:
3057	trace_xe_vma_unbind(vma: gpuva_to_vma(gpuva: op->base.unmap.va));
3058	break;
3059	case DRM_GPUVA_OP_PREFETCH:
3060	trace_xe_vma_bind(vma: gpuva_to_vma(gpuva: op->base.prefetch.va));
3061	break;
3062	case DRM_GPUVA_OP_DRIVER:
3063	break;
3064	default:
3065	XE_WARN_ON("NOT POSSIBLE");
3066	}
3067	}
3068
3069	static void trace_xe_vm_ops_execute(struct xe_vma_ops *vops)
3070	{
3071	struct xe_vma_op *op;
3072
3073	list_for_each_entry(op, &vops->list, link)
3074	op_trace(op);
3075	}
3076
3077	static int vm_ops_setup_tile_args(struct xe_vm vm, struct* xe_vma_ops *vops)
3078	{
3079	struct xe_exec_queue *q = vops->q;
3080	struct xe_tile *tile;
3081	int number_tiles = `0`;
3082	u8 id;
3083
3084	for_each_tile(tile, vm->xe, id) {
3085	if (vops->pt_update_ops[id].num_ops)
3086	++number_tiles;
3087
3088	if (vops->pt_update_ops[id].q)
3089	continue;
3090
3091	if (q) {
3092	vops->pt_update_ops[id].q = q;
3093	if (vm->pt_root[id] && !list_empty(head: &q->multi_gt_list))
3094	q = list_next_entry(q, multi_gt_list);
3095	} else {
3096	vops->pt_update_ops[id].q = vm->q[id];
3097	}
3098	}
3099
3100	return number_tiles;
3101	}
3102
3103	static struct dma_fence ops_execute(struct* xe_vm *vm,
3104	struct xe_vma_ops *vops)
3105	{
3106	struct xe_tile *tile;
3107	struct dma_fence *fence = NULL;
3108	struct dma_fence **fences = NULL;
3109	struct dma_fence_array *cf = NULL;
3110	int number_tiles = `0`, current_fence = `0`, n_fence = `0`, err;
3111	u8 id;
3112
3113	number_tiles = vm_ops_setup_tile_args(vm, vops);
3114	if (number_tiles == `0`)
3115	return ERR_PTR(error: -ENODATA);
3116
3117	if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) {
3118	for_each_tile(tile, vm->xe, id)
3119	++n_fence;
3120	} else {
3121	for_each_tile(tile, vm->xe, id)
3122	n_fence += (`1` + XE_MAX_GT_PER_TILE);
3123	}
3124
3125	fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL);
3126	if (!fences) {
3127	fence = ERR_PTR(error: -ENOMEM);
3128	goto err_trace;
3129	}
3130
3131	cf = dma_fence_array_alloc(num_fences: n_fence);
3132	if (!cf) {
3133	fence = ERR_PTR(error: -ENOMEM);
3134	goto err_out;
3135	}
3136
3137	for_each_tile(tile, vm->xe, id) {
3138	if (!vops->pt_update_ops[id].num_ops)
3139	continue;
3140
3141	err = xe_pt_update_ops_prepare(tile, vops);
3142	if (err) {
3143	fence = ERR_PTR(error: err);
3144	goto err_out;
3145	}
3146	}
3147
3148	trace_xe_vm_ops_execute(vops);
3149
3150	for_each_tile(tile, vm->xe, id) {
3151	struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
3152	int i;
3153
3154	fence = NULL;
3155	if (!vops->pt_update_ops[id].num_ops)
3156	goto collect_fences;
3157
3158	fence = xe_pt_update_ops_run(tile, vops);
3159	if (IS_ERR(ptr: fence))
3160	goto err_out;
3161
3162	collect_fences:
3163	fences[current_fence++] = fence ?: dma_fence_get_stub();
3164	if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
3165	continue;
3166
3167	xe_migrate_job_lock(m: tile->migrate, q);
3168	for_each_tlb_inval(i)
3169	fences[current_fence++] =
3170	xe_exec_queue_tlb_inval_last_fence_get(q, vm, type: i);
3171	xe_migrate_job_unlock(m: tile->migrate, q);
3172	}
3173
3174	xe_assert(vm->xe, current_fence == n_fence);
3175	dma_fence_array_init(array: cf, num_fences: n_fence, fences, context: dma_fence_context_alloc(num: `1`),
3176	seqno: `1`, signal_on_any: false);
3177	fence = &cf->base;
3178
3179	for_each_tile(tile, vm->xe, id) {
3180	if (!vops->pt_update_ops[id].num_ops)
3181	continue;
3182
3183	xe_pt_update_ops_fini(tile, vops);
3184	}
3185
3186	return fence;
3187
3188	err_out:
3189	for_each_tile(tile, vm->xe, id) {
3190	if (!vops->pt_update_ops[id].num_ops)
3191	continue;
3192
3193	xe_pt_update_ops_abort(tile, vops);
3194	}
3195	while (current_fence)
3196	dma_fence_put(fence: fences[--current_fence]);
3197	kfree(objp: fences);
3198	kfree(objp: cf);
3199
3200	err_trace:
3201	trace_xe_vm_ops_fail(vm);
3202	return fence;
3203	}
3204
3205	static void vma_add_ufence(struct xe_vma vma, struct* xe_user_fence *ufence)
3206	{
3207	if (vma->ufence)
3208	xe_sync_ufence_put(ufence: vma->ufence);
3209	vma->ufence = __xe_sync_ufence_get(ufence);
3210	}
3211
3212	static void op_add_ufence(struct xe_vm vm, struct* xe_vma_op *op,
3213	struct xe_user_fence *ufence)
3214	{
3215	switch (op->base.op) {
3216	case DRM_GPUVA_OP_MAP:
3217	vma_add_ufence(vma: op->map.vma, ufence);
3218	break;
3219	case DRM_GPUVA_OP_REMAP:
3220	if (op->remap.prev)
3221	vma_add_ufence(vma: op->remap.prev, ufence);
3222	if (op->remap.next)
3223	vma_add_ufence(vma: op->remap.next, ufence);
3224	break;
3225	case DRM_GPUVA_OP_UNMAP:
3226	break;
3227	case DRM_GPUVA_OP_PREFETCH:
3228	vma_add_ufence(vma: gpuva_to_vma(gpuva: op->base.prefetch.va), ufence);
3229	break;
3230	default:
3231	drm_warn(&vm->xe->drm, "NOT POSSIBLE");
3232	}
3233	}
3234
3235	static void vm_bind_ioctl_ops_fini(struct xe_vm vm, struct* xe_vma_ops *vops,
3236	struct dma_fence *fence)
3237	{
3238	struct xe_user_fence *ufence;
3239	struct xe_vma_op *op;
3240	int i;
3241
3242	ufence = find_ufence_get(syncs: vops->syncs, num_syncs: vops->num_syncs);
3243	list_for_each_entry(op, &vops->list, link) {
3244	if (ufence)
3245	op_add_ufence(vm, op, ufence);
3246
3247	if (op->base.op == DRM_GPUVA_OP_UNMAP)
3248	xe_vma_destroy(vma: gpuva_to_vma(gpuva: op->base.unmap.va), fence);
3249	else if (op->base.op == DRM_GPUVA_OP_REMAP)
3250	xe_vma_destroy(vma: gpuva_to_vma(gpuva: op->base.remap.unmap->va),
3251	fence);
3252	}
3253	if (ufence)
3254	xe_sync_ufence_put(ufence);
3255	if (fence) {
3256	for (i = `0`; i < vops->num_syncs; i++)
3257	xe_sync_entry_signal(sync: vops->syncs + i, fence);
3258	}
3259	}
3260
3261	static struct dma_fence vm_bind_ioctl_ops_execute(struct* xe_vm *vm,
3262	struct xe_vma_ops *vops)
3263	{
3264	struct xe_validation_ctx ctx;
3265	struct drm_exec exec;
3266	struct dma_fence *fence;
3267	int err = `0`;
3268
3269	lockdep_assert_held_write(&vm->lock);
3270
3271	xe_validation_guard(&ctx, &vm->xe->val, &exec,
3272	((struct xe_val_flags) {
3273	.interruptible = true,
3274	.exec_ignore_duplicates = true,
3275	}), err) {
3276	err = vm_bind_ioctl_ops_lock_and_prep(exec: &exec, vm, vops);
3277	drm_exec_retry_on_contention(&exec);
3278	xe_validation_retry_on_oom(&ctx, &err);
3279	if (err)
3280	return ERR_PTR(error: err);
3281
3282	xe_vm_set_validation_exec(vm, exec: &exec);
3283	fence = ops_execute(vm, vops);
3284	xe_vm_set_validation_exec(vm, NULL);
3285	if (IS_ERR(ptr: fence)) {
3286	if (PTR_ERR(ptr: fence) == -ENODATA)
3287	vm_bind_ioctl_ops_fini(vm, vops, NULL);
3288	return fence;
3289	}
3290
3291	vm_bind_ioctl_ops_fini(vm, vops, fence);
3292	}
3293
3294	return err ? ERR_PTR(error: err) : fence;
3295	}
3296	ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
3297
3298	#define SUPPORTED_FLAGS_STUB \
3299	(DRM_XE_VM_BIND_FLAG_READONLY \| \
3300	DRM_XE_VM_BIND_FLAG_IMMEDIATE \| \
3301	DRM_XE_VM_BIND_FLAG_NULL \| \
3302	DRM_XE_VM_BIND_FLAG_DUMPABLE \| \
3303	DRM_XE_VM_BIND_FLAG_CHECK_PXP \| \
3304	DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR \| \
3305	DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
3306
3307	#ifdef TEST_VM_OPS_ERROR
3308	#define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB \| FORCE_OP_ERROR)
3309	#else
3310	#define SUPPORTED_FLAGS SUPPORTED_FLAGS_STUB
3311	#endif
3312
3313	#define XE_64K_PAGE_MASK 0xffffull
3314	#define ALL_DRM_XE_SYNCS_FLAGS (DRM_XE_SYNCS_FLAG_WAIT_FOR_OP)
3315
3316	static int vm_bind_ioctl_check_args(struct xe_device xe, struct* xe_vm *vm,
3317	struct drm_xe_vm_bind *args,
3318	struct drm_xe_vm_bind_op **bind_ops)
3319	{
3320	int err;
3321	int i;
3322
3323	if (XE_IOCTL_DBG(xe, args->pad \|\| args->pad2) \|\|
3324	XE_IOCTL_DBG(xe, args->reserved[`0`] \|\| args->reserved[`1`]))
3325	return -EINVAL;
3326
3327	if (XE_IOCTL_DBG(xe, args->extensions))
3328	return -EINVAL;
3329
3330	if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
3331	return -EINVAL;
3332
3333	if (args->num_binds > `1`) {
3334	u64 __user *bind_user =
3335	u64_to_user_ptr(args->vector_of_binds);
3336
3337	*bind_ops = kvmalloc_array(args->num_binds,
3338	sizeof(struct drm_xe_vm_bind_op),
3339	GFP_KERNEL \| __GFP_ACCOUNT \|
3340	__GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
3341	if (!*bind_ops)
3342	return args->num_binds > `1` ? -ENOBUFS : -ENOMEM;
3343
3344	err = copy_from_user(to: *bind_ops, from: bind_user,
3345	n: sizeof(struct drm_xe_vm_bind_op) *
3346	args->num_binds);
3347	if (XE_IOCTL_DBG(xe, err)) {
3348	err = -EFAULT;
3349	goto free_bind_ops;
3350	}
3351	} else {
3352	*bind_ops = &args->bind;
3353	}
3354
3355	for (i = `0`; i < args->num_binds; ++i) {
3356	u64 range = (*bind_ops)[i].range;
3357	u64 addr = (*bind_ops)[i].addr;
3358	u32 op = (*bind_ops)[i].op;
3359	u32 flags = (*bind_ops)[i].flags;
3360	u32 obj = (*bind_ops)[i].obj;
3361	u64 obj_offset = (*bind_ops)[i].obj_offset;
3362	u32 prefetch_region = (*bind_ops)[i].prefetch_mem_region_instance;
3363	bool is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
3364	bool is_cpu_addr_mirror = flags &
3365	DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
3366	u16 pat_index = (*bind_ops)[i].pat_index;
3367	u16 coh_mode;
3368
3369	if (XE_IOCTL_DBG(xe, is_cpu_addr_mirror &&
3370	(!xe_vm_in_fault_mode(vm) \|\|
3371	!IS_ENABLED(CONFIG_DRM_XE_GPUSVM)))) {
3372	err = -EINVAL;
3373	goto free_bind_ops;
3374	}
3375
3376	if (XE_IOCTL_DBG(xe, pat_index >= xe->pat.n_entries)) {
3377	err = -EINVAL;
3378	goto free_bind_ops;
3379	}
3380
3381	pat_index = array_index_nospec(pat_index, xe->pat.n_entries);
3382	(*bind_ops)[i].pat_index = pat_index;
3383	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3384	if (XE_IOCTL_DBG(xe, !coh_mode)) { / hw reserved /
3385	err = -EINVAL;
3386	goto free_bind_ops;
3387	}
3388
3389	if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY)) {
3390	err = -EINVAL;
3391	goto free_bind_ops;
3392	}
3393
3394	if (XE_IOCTL_DBG(xe, op > DRM_XE_VM_BIND_OP_PREFETCH) \|\|
3395	XE_IOCTL_DBG(xe, flags & ~SUPPORTED_FLAGS) \|\|
3396	XE_IOCTL_DBG(xe, obj && (is_null \|\| is_cpu_addr_mirror)) \|\|
3397	XE_IOCTL_DBG(xe, obj_offset && (is_null \|\|
3398	is_cpu_addr_mirror)) \|\|
3399	XE_IOCTL_DBG(xe, op != DRM_XE_VM_BIND_OP_MAP &&
3400	(is_null \|\| is_cpu_addr_mirror)) \|\|
3401	XE_IOCTL_DBG(xe, !obj &&
3402	op == DRM_XE_VM_BIND_OP_MAP &&
3403	!is_null && !is_cpu_addr_mirror) \|\|
3404	XE_IOCTL_DBG(xe, !obj &&
3405	op == DRM_XE_VM_BIND_OP_UNMAP_ALL) \|\|
3406	XE_IOCTL_DBG(xe, addr &&
3407	op == DRM_XE_VM_BIND_OP_UNMAP_ALL) \|\|
3408	XE_IOCTL_DBG(xe, range &&
3409	op == DRM_XE_VM_BIND_OP_UNMAP_ALL) \|\|
3410	XE_IOCTL_DBG(xe, obj &&
3411	op == DRM_XE_VM_BIND_OP_MAP_USERPTR) \|\|
3412	XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3413	op == DRM_XE_VM_BIND_OP_MAP_USERPTR) \|\|
3414	XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
3415	!IS_ENABLED(CONFIG_DRM_GPUSVM)) \|\|
3416	XE_IOCTL_DBG(xe, obj &&
3417	op == DRM_XE_VM_BIND_OP_PREFETCH) \|\|
3418	XE_IOCTL_DBG(xe, prefetch_region &&
3419	op != DRM_XE_VM_BIND_OP_PREFETCH) \|\|
3420	XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
3421	/ Guard against undefined shift in BIT(prefetch_region) /
3422	(prefetch_region >= (sizeof(xe->info.mem_region_mask) * `8`) \|\|
3423	!(BIT(prefetch_region) & xe->info.mem_region_mask)))) \|\|
3424	XE_IOCTL_DBG(xe, obj &&
3425	op == DRM_XE_VM_BIND_OP_UNMAP) \|\|
3426	XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
3427	(!is_cpu_addr_mirror \|\| op != DRM_XE_VM_BIND_OP_MAP))) {
3428	err = -EINVAL;
3429	goto free_bind_ops;
3430	}
3431
3432	if (XE_IOCTL_DBG(xe, obj_offset & ~PAGE_MASK) \|\|
3433	XE_IOCTL_DBG(xe, addr & ~PAGE_MASK) \|\|
3434	XE_IOCTL_DBG(xe, range & ~PAGE_MASK) \|\|
3435	XE_IOCTL_DBG(xe, !range &&
3436	op != DRM_XE_VM_BIND_OP_UNMAP_ALL)) {
3437	err = -EINVAL;
3438	goto free_bind_ops;
3439	}
3440	}
3441
3442	return `0`;
3443
3444	free_bind_ops:
3445	if (args->num_binds > `1`)
3446	kvfree(addr: *bind_ops);
3447	*bind_ops = NULL;
3448	return err;
3449	}
3450
3451	static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
3452	struct xe_exec_queue *q,
3453	struct xe_sync_entry *syncs,
3454	int num_syncs)
3455	{
3456	struct dma_fence *fence = NULL;
3457	int i, err = `0`;
3458
3459	if (num_syncs) {
3460	fence = xe_sync_in_fence_get(sync: syncs, num_sync: num_syncs,
3461	q: to_wait_exec_queue(vm, q), vm);
3462	if (IS_ERR(ptr: fence))
3463	return PTR_ERR(ptr: fence);
3464
3465	for (i = `0`; i < num_syncs; i++)
3466	xe_sync_entry_signal(sync: &syncs[i], fence);
3467	}
3468
3469	dma_fence_put(fence);
3470
3471	return err;
3472	}
3473
3474	static void xe_vma_ops_init(struct xe_vma_ops vops, struct* xe_vm *vm,
3475	struct xe_exec_queue *q,
3476	struct xe_sync_entry *syncs, u32 num_syncs)
3477	{
3478	memset(vops, `0`, sizeof(*vops));
3479	INIT_LIST_HEAD(list: &vops->list);
3480	vops->vm = vm;
3481	vops->q = q;
3482	vops->syncs = syncs;
3483	vops->num_syncs = num_syncs;
3484	vops->flags = `0`;
3485	}
3486
3487	static int xe_vm_bind_ioctl_validate_bo(struct xe_device xe, struct* xe_bo *bo,
3488	u64 addr, u64 range, u64 obj_offset,
3489	u16 pat_index, u32 op, u32 bind_flags)
3490	{
3491	u16 coh_mode;
3492
3493	if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) \|\|
3494	XE_IOCTL_DBG(xe, obj_offset >
3495	xe_bo_size(bo) - range)) {
3496	return -EINVAL;
3497	}
3498
3499	/*
3500	* Some platforms require 64k VM_BIND alignment,
3501	* specifically those with XE_VRAM_FLAGS_NEED64K.
3502	*
3503	* Other platforms may have BO's set to 64k physical placement,
3504	* but can be mapped at 4k offsets anyway. This check is only
3505	* there for the former case.
3506	*/
3507	if ((bo->flags & XE_BO_FLAG_INTERNAL_64K) &&
3508	(xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)) {
3509	if (XE_IOCTL_DBG(xe, obj_offset &
3510	XE_64K_PAGE_MASK) \|\|
3511	XE_IOCTL_DBG(xe, addr & XE_64K_PAGE_MASK) \|\|
3512	XE_IOCTL_DBG(xe, range & XE_64K_PAGE_MASK)) {
3513	return -EINVAL;
3514	}
3515	}
3516
3517	coh_mode = xe_pat_index_get_coh_mode(xe, pat_index);
3518	if (bo->cpu_caching) {
3519	if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
3520	bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)) {
3521	return -EINVAL;
3522	}
3523	} else if (XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE)) {
3524	/*
3525	* Imported dma-buf from a different device should
3526	* require 1way or 2way coherency since we don't know
3527	* how it was mapped on the CPU. Just assume is it
3528	* potentially cached on CPU side.
3529	*/
3530	return -EINVAL;
3531	}
3532
3533	/ If a BO is protected it can only be mapped if the key is still valid /
3534	if ((bind_flags & DRM_XE_VM_BIND_FLAG_CHECK_PXP) && xe_bo_is_protected(bo) &&
3535	op != DRM_XE_VM_BIND_OP_UNMAP && op != DRM_XE_VM_BIND_OP_UNMAP_ALL)
3536	if (XE_IOCTL_DBG(xe, xe_pxp_bo_key_check(xe->pxp, bo) != `0`))
3537	return -ENOEXEC;
3538
3539	return `0`;
3540	}
3541
3542	int xe_vm_bind_ioctl(struct drm_device dev, void* data, struct* drm_file *file)
3543	{
3544	struct xe_device *xe = to_xe_device(dev);
3545	struct xe_file *xef = to_xe_file(file);
3546	struct drm_xe_vm_bind *args = data;
3547	struct drm_xe_sync __user *syncs_user;
3548	struct xe_bo **bos = NULL;
3549	struct drm_gpuva_ops **ops = NULL;
3550	struct xe_vm *vm;
3551	struct xe_exec_queue *q = NULL;
3552	u32 num_syncs, num_ufence = `0`;
3553	struct xe_sync_entry *syncs = NULL;
3554	struct drm_xe_vm_bind_op *bind_ops = NULL;
3555	struct xe_vma_ops vops;
3556	struct dma_fence *fence;
3557	int err;
3558	int i;
3559
3560	vm = xe_vm_lookup(xef, id: args->vm_id);
3561	if (XE_IOCTL_DBG(xe, !vm))
3562	return -EINVAL;
3563
3564	err = vm_bind_ioctl_check_args(xe, vm, args, bind_ops: &bind_ops);
3565	if (err)
3566	goto put_vm;
3567
3568	if (args->exec_queue_id) {
3569	q = xe_exec_queue_lookup(xef, id: args->exec_queue_id);
3570	if (XE_IOCTL_DBG(xe, !q)) {
3571	err = -ENOENT;
3572	goto free_bind_ops;
3573	}
3574
3575	if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
3576	err = -EINVAL;
3577	goto put_exec_queue;
3578	}
3579	}
3580
3581	if (XE_IOCTL_DBG(xe, q && vm != q->user_vm)) {
3582	err = -EINVAL;
3583	goto put_exec_queue;
3584	}
3585
3586	/ Ensure all UNMAPs visible /
3587	xe_svm_flush(vm);
3588
3589	err = down_write_killable(sem: &vm->lock);
3590	if (err)
3591	goto put_exec_queue;
3592
3593	if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
3594	err = -ENOENT;
3595	goto release_vm_lock;
3596	}
3597
3598	for (i = `0`; i < args->num_binds; ++i) {
3599	u64 range = bind_ops[i].range;
3600	u64 addr = bind_ops[i].addr;
3601
3602	if (XE_IOCTL_DBG(xe, range > vm->size) \|\|
3603	XE_IOCTL_DBG(xe, addr > vm->size - range)) {
3604	err = -EINVAL;
3605	goto release_vm_lock;
3606	}
3607	}
3608
3609	if (args->num_binds) {
3610	bos = kvcalloc(args->num_binds, sizeof(*bos),
3611	GFP_KERNEL \| __GFP_ACCOUNT \|
3612	__GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
3613	if (!bos) {
3614	err = -ENOMEM;
3615	goto release_vm_lock;
3616	}
3617
3618	ops = kvcalloc(args->num_binds, sizeof(*ops),
3619	GFP_KERNEL \| __GFP_ACCOUNT \|
3620	__GFP_RETRY_MAYFAIL \| __GFP_NOWARN);
3621	if (!ops) {
3622	err = -ENOMEM;
3623	goto free_bos;
3624	}
3625	}
3626
3627	for (i = `0`; i < args->num_binds; ++i) {
3628	struct drm_gem_object *gem_obj;
3629	u64 range = bind_ops[i].range;
3630	u64 addr = bind_ops[i].addr;
3631	u32 obj = bind_ops[i].obj;
3632	u64 obj_offset = bind_ops[i].obj_offset;
3633	u16 pat_index = bind_ops[i].pat_index;
3634	u32 op = bind_ops[i].op;
3635	u32 bind_flags = bind_ops[i].flags;
3636
3637	if (!obj)
3638	continue;
3639
3640	gem_obj = drm_gem_object_lookup(filp: file, handle: obj);
3641	if (XE_IOCTL_DBG(xe, !gem_obj)) {
3642	err = -ENOENT;
3643	goto put_obj;
3644	}
3645	bos[i] = gem_to_xe_bo(obj: gem_obj);
3646
3647	err = xe_vm_bind_ioctl_validate_bo(xe, bo: bos[i], addr, range,
3648	obj_offset, pat_index, op,
3649	bind_flags);
3650	if (err)
3651	goto put_obj;
3652	}
3653
3654	if (args->num_syncs) {
3655	syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
3656	if (!syncs) {
3657	err = -ENOMEM;
3658	goto put_obj;
3659	}
3660	}
3661
3662	syncs_user = u64_to_user_ptr(args->syncs);
3663	for (num_syncs = `0`; num_syncs < args->num_syncs; num_syncs++) {
3664	struct xe_exec_queue *__q = q ?: vm->q[`0`];
3665
3666	err = xe_sync_entry_parse(xe, xef, sync: &syncs[num_syncs],
3667	sync_user: &syncs_user[num_syncs],
3668	ufence_syncobj: __q->ufence_syncobj,
3669	ufence_timeline_value: ++__q->ufence_timeline_value,
3670	flags: (xe_vm_in_lr_mode(vm) ?
3671	SYNC_PARSE_FLAG_LR_MODE : `0`) \|
3672	(!args->num_binds ?
3673	SYNC_PARSE_FLAG_DISALLOW_USER_FENCE : `0`));
3674	if (err)
3675	goto free_syncs;
3676
3677	if (xe_sync_is_ufence(sync: &syncs[num_syncs]))
3678	num_ufence++;
3679	}
3680
3681	if (XE_IOCTL_DBG(xe, num_ufence > `1`)) {
3682	err = -EINVAL;
3683	goto free_syncs;
3684	}
3685
3686	if (!args->num_binds) {
3687	err = -ENODATA;
3688	goto free_syncs;
3689	}
3690
3691	xe_vma_ops_init(vops: &vops, vm, q, syncs, num_syncs);
3692	if (args->num_binds > `1`)
3693	vops.flags \|= XE_VMA_OPS_ARRAY_OF_BINDS;
3694	for (i = `0`; i < args->num_binds; ++i) {
3695	u64 range = bind_ops[i].range;
3696	u64 addr = bind_ops[i].addr;
3697	u32 op = bind_ops[i].op;
3698	u32 flags = bind_ops[i].flags;
3699	u64 obj_offset = bind_ops[i].obj_offset;
3700	u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
3701	u16 pat_index = bind_ops[i].pat_index;
3702
3703	ops[i] = vm_bind_ioctl_ops_create(vm, vops: &vops, bo: bos[i], bo_offset_or_userptr: obj_offset,
3704	addr, range, operation: op, flags,
3705	prefetch_region, pat_index);
3706	if (IS_ERR(ptr: ops[i])) {
3707	err = PTR_ERR(ptr: ops[i]);
3708	ops[i] = NULL;
3709	goto unwind_ops;
3710	}
3711
3712	err = vm_bind_ioctl_ops_parse(vm, ops: ops[i], vops: &vops);
3713	if (err)
3714	goto unwind_ops;
3715
3716	#ifdef TEST_VM_OPS_ERROR
3717	if (flags & FORCE_OP_ERROR) {
3718	vops.inject_error = true;
3719	vm->xe->vm_inject_error_position =
3720	(vm->xe->vm_inject_error_position + `1`) %
3721	FORCE_OP_ERROR_COUNT;
3722	}
3723	#endif
3724	}
3725
3726	/ Nothing to do /
3727	if (list_empty(head: &vops.list)) {
3728	err = -ENODATA;
3729	goto unwind_ops;
3730	}
3731
3732	err = xe_vma_ops_alloc(vops: &vops, array_of_binds: args->num_binds > `1`);
3733	if (err)
3734	goto unwind_ops;
3735
3736	err = vm_bind_ioctl_ops_prefetch_ranges(vm, vops: &vops);
3737	if (err)
3738	goto unwind_ops;
3739
3740	fence = vm_bind_ioctl_ops_execute(vm, vops: &vops);
3741	if (IS_ERR(ptr: fence))
3742	err = PTR_ERR(ptr: fence);
3743	else
3744	dma_fence_put(fence);
3745
3746	unwind_ops:
3747	if (err && err != -ENODATA)
3748	vm_bind_ioctl_ops_unwind(vm, ops, num_ops_list: args->num_binds);
3749	xe_vma_ops_fini(vops: &vops);
3750	for (i = args->num_binds - `1`; i >= `0`; --i)
3751	if (ops[i])
3752	drm_gpuva_ops_free(gpuvm: &vm->gpuvm, ops: ops[i]);
3753	free_syncs:
3754	if (err == -ENODATA)
3755	err = vm_bind_ioctl_signal_fences(vm, q, syncs, num_syncs);
3756	while (num_syncs--)
3757	xe_sync_entry_cleanup(sync: &syncs[num_syncs]);
3758
3759	kfree(objp: syncs);
3760	put_obj:
3761	for (i = `0`; i < args->num_binds; ++i)
3762	xe_bo_put(bo: bos[i]);
3763
3764	kvfree(addr: ops);
3765	free_bos:
3766	kvfree(addr: bos);
3767	release_vm_lock:
3768	up_write(sem: &vm->lock);
3769	put_exec_queue:
3770	if (q)
3771	xe_exec_queue_put(q);
3772	free_bind_ops:
3773	if (args->num_binds > `1`)
3774	kvfree(addr: bind_ops);
3775	put_vm:
3776	xe_vm_put(vm);
3777	return err;
3778	}
3779
3780	/**
3781	* xe_vm_bind_kernel_bo - bind a kernel BO to a VM
3782	* @vm: VM to bind the BO to
3783	* @bo: BO to bind
3784	* @q: exec queue to use for the bind (optional)
3785	* @addr: address at which to bind the BO
3786	* @cache_lvl: PAT cache level to use
3787	*
3788	* Execute a VM bind map operation on a kernel-owned BO to bind it into a
3789	* kernel-owned VM.
3790	*
3791	* Returns a dma_fence to track the binding completion if the job to do so was
3792	* successfully submitted, an error pointer otherwise.
3793	*/
3794	struct dma_fence xe_vm_bind_kernel_bo(struct* xe_vm vm, struct* xe_bo *bo,
3795	struct xe_exec_queue *q, u64 addr,
3796	enum xe_cache_level cache_lvl)
3797	{
3798	struct xe_vma_ops vops;
3799	struct drm_gpuva_ops *ops = NULL;
3800	struct dma_fence *fence;
3801	int err;
3802
3803	xe_bo_get(bo);
3804	xe_vm_get(vm);
3805	if (q)
3806	xe_exec_queue_get(q);
3807
3808	down_write(sem: &vm->lock);
3809
3810	xe_vma_ops_init(vops: &vops, vm, q, NULL, num_syncs: `0`);
3811
3812	ops = vm_bind_ioctl_ops_create(vm, vops: &vops, bo, bo_offset_or_userptr: `0`, addr, range: xe_bo_size(bo),
3813	DRM_XE_VM_BIND_OP_MAP, flags: `0`, prefetch_region: `0`,
3814	pat_index: vm->xe->pat.idx[cache_lvl]);
3815	if (IS_ERR(ptr: ops)) {
3816	err = PTR_ERR(ptr: ops);
3817	goto release_vm_lock;
3818	}
3819
3820	err = vm_bind_ioctl_ops_parse(vm, ops, vops: &vops);
3821	if (err)
3822	goto release_vm_lock;
3823
3824	xe_assert(vm->xe, !list_empty(&vops.list));
3825
3826	err = xe_vma_ops_alloc(vops: &vops, array_of_binds: false);
3827	if (err)
3828	goto unwind_ops;
3829
3830	fence = vm_bind_ioctl_ops_execute(vm, vops: &vops);
3831	if (IS_ERR(ptr: fence))
3832	err = PTR_ERR(ptr: fence);
3833
3834	unwind_ops:
3835	if (err && err != -ENODATA)
3836	vm_bind_ioctl_ops_unwind(vm, ops: &ops, num_ops_list: `1`);
3837
3838	xe_vma_ops_fini(vops: &vops);
3839	drm_gpuva_ops_free(gpuvm: &vm->gpuvm, ops);
3840
3841	release_vm_lock:
3842	up_write(sem: &vm->lock);
3843
3844	if (q)
3845	xe_exec_queue_put(q);
3846	xe_vm_put(vm);
3847	xe_bo_put(bo);
3848
3849	if (err)
3850	fence = ERR_PTR(error: err);
3851
3852	return fence;
3853	}
3854
3855	/**
3856	* xe_vm_lock() - Lock the vm's dma_resv object
3857	* @vm: The struct xe_vm whose lock is to be locked
3858	* @intr: Whether to perform any wait interruptible
3859	*
3860	* Return: 0 on success, -EINTR if @intr is true and the wait for a
3861	* contended lock was interrupted. If @intr is false, the function
3862	* always returns 0.
3863	*/
3864	int xe_vm_lock(struct xe_vm *vm, bool intr)
3865	{
3866	int ret;
3867
3868	if (intr)
3869	ret = dma_resv_lock_interruptible(obj: xe_vm_resv(vm), NULL);
3870	else
3871	ret = dma_resv_lock(obj: xe_vm_resv(vm), NULL);
3872
3873	return ret;
3874	}
3875
3876	/**
3877	* xe_vm_unlock() - Unlock the vm's dma_resv object
3878	* @vm: The struct xe_vm whose lock is to be released.
3879	*
3880	* Unlock a buffer object lock that was locked by xe_vm_lock().
3881	*/
3882	void xe_vm_unlock(struct xe_vm *vm)
3883	{
3884	dma_resv_unlock(obj: xe_vm_resv(vm));
3885	}
3886
3887	/**
3888	* xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
3889	* address range
3890	* @vm: The VM
3891	* @start: start address
3892	* @end: end address
3893	* @tile_mask: mask for which gt's issue tlb invalidation
3894	*
3895	* Issue a range based TLB invalidation for gt's in tilemask
3896	*
3897	* Returns 0 for success, negative error code otherwise.
3898	*/
3899	int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
3900	u64 end, u8 tile_mask)
3901	{
3902	struct xe_tlb_inval_fence
3903	fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
3904	struct xe_tile *tile;
3905	u32 fence_id = `0`;
3906	u8 id;
3907	int err;
3908
3909	if (!tile_mask)
3910	return `0`;
3911
3912	for_each_tile(tile, vm->xe, id) {
3913	if (!(tile_mask & BIT(id)))
3914	continue;
3915
3916	xe_tlb_inval_fence_init(tlb_inval: &tile->primary_gt->tlb_inval,
3917	fence: &fence[fence_id], stack: true);
3918
3919	err = xe_tlb_inval_range(tlb_inval: &tile->primary_gt->tlb_inval,
3920	fence: &fence[fence_id], start, end,
3921	asid: vm->usm.asid);
3922	if (err)
3923	goto wait;
3924	++fence_id;
3925
3926	if (!tile->media_gt)
3927	continue;
3928
3929	xe_tlb_inval_fence_init(tlb_inval: &tile->media_gt->tlb_inval,
3930	fence: &fence[fence_id], stack: true);
3931
3932	err = xe_tlb_inval_range(tlb_inval: &tile->media_gt->tlb_inval,
3933	fence: &fence[fence_id], start, end,
3934	asid: vm->usm.asid);
3935	if (err)
3936	goto wait;
3937	++fence_id;
3938	}
3939
3940	wait:
3941	for (id = `0`; id < fence_id; ++id)
3942	xe_tlb_inval_fence_wait(fence: &fence[id]);
3943
3944	return err;
3945	}
3946
3947	/**
3948	* xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
3949	* @vma: VMA to invalidate
3950	*
3951	* Walks a list of page tables leaves which it memset the entries owned by this
3952	* VMA to zero, invalidates the TLBs, and block until TLBs invalidation is
3953	* complete.
3954	*
3955	* Returns 0 for success, negative error code otherwise.
3956	*/
3957	int xe_vm_invalidate_vma(struct xe_vma *vma)
3958	{
3959	struct xe_device *xe = xe_vma_vm(vma)->xe;
3960	struct xe_vm *vm = xe_vma_vm(vma);
3961	struct xe_tile *tile;
3962	u8 tile_mask = `0`;
3963	int ret = `0`;
3964	u8 id;
3965
3966	xe_assert(xe, !xe_vma_is_null(vma));
3967	xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
3968	trace_xe_vma_invalidate(vma);
3969
3970	vm_dbg(&vm->xe->drm,
3971	"INVALIDATE: addr=0x%016llx, range=0x%016llx",
3972	xe_vma_start(vma), xe_vma_size(vma));
3973
3974	/*
3975	* Check that we don't race with page-table updates, tile_invalidated
3976	* update is safe
3977	*/
3978	if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
3979	if (xe_vma_is_userptr(vma)) {
3980	lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, `0`) \|\|
3981	(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, `1`) &&
3982	lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
3983
3984	WARN_ON_ONCE(!mmu_interval_check_retry
3985	(&to_userptr_vma(vma)->userptr.notifier,
3986	to_userptr_vma(vma)->userptr.pages.notifier_seq));
3987	WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
3988	DMA_RESV_USAGE_BOOKKEEP));
3989
3990	} else {
3991	xe_bo_assert_held(bo: xe_vma_bo(vma));
3992	}
3993	}
3994
3995	for_each_tile(tile, xe, id)
3996	if (xe_pt_zap_ptes(tile, vma))
3997	tile_mask \|= BIT(id);
3998
3999	xe_device_wmb(xe);
4000
4001	ret = xe_vm_range_tilemask_tlb_inval(vm: xe_vma_vm(vma), start: xe_vma_start(vma),
4002	end: xe_vma_end(vma), tile_mask);
4003
4004	/ WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() /
4005	WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
4006
4007	return ret;
4008	}
4009
4010	int xe_vm_validate_protected(struct xe_vm *vm)
4011	{
4012	struct drm_gpuva *gpuva;
4013	int err = `0`;
4014
4015	if (!vm)
4016	return -ENODEV;
4017
4018	mutex_lock(&vm->snap_mutex);
4019
4020	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4021	struct xe_vma *vma = gpuva_to_vma(gpuva);
4022	struct xe_bo *bo = vma->gpuva.gem.obj ?
4023	gem_to_xe_bo(obj: vma->gpuva.gem.obj) : NULL;
4024
4025	if (!bo)
4026	continue;
4027
4028	if (xe_bo_is_protected(bo)) {
4029	err = xe_pxp_bo_key_check(pxp: vm->xe->pxp, bo);
4030	if (err)
4031	break;
4032	}
4033	}
4034
4035	mutex_unlock(lock: &vm->snap_mutex);
4036	return err;
4037	}
4038
4039	struct xe_vm_snapshot {
4040	unsigned long num_snaps;
4041	struct {
4042	u64 ofs, bo_ofs;
4043	unsigned long len;
4044	struct xe_bo *bo;
4045	void *data;
4046	struct mm_struct *mm;
4047	} snap[];
4048	};
4049
4050	struct xe_vm_snapshot xe_vm_snapshot_capture(struct* xe_vm *vm)
4051	{
4052	unsigned long num_snaps = `0`, i;
4053	struct xe_vm_snapshot *snap = NULL;
4054	struct drm_gpuva *gpuva;
4055
4056	if (!vm)
4057	return NULL;
4058
4059	mutex_lock(&vm->snap_mutex);
4060	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4061	if (gpuva->flags & XE_VMA_DUMPABLE)
4062	num_snaps++;
4063	}
4064
4065	if (num_snaps)
4066	snap = kvzalloc(offsetof(struct xe_vm_snapshot, snap[num_snaps]), GFP_NOWAIT);
4067	if (!snap) {
4068	snap = num_snaps ? ERR_PTR(error: -ENOMEM) : ERR_PTR(error: -ENODEV);
4069	goto out_unlock;
4070	}
4071
4072	snap->num_snaps = num_snaps;
4073	i = `0`;
4074	drm_gpuvm_for_each_va(gpuva, &vm->gpuvm) {
4075	struct xe_vma *vma = gpuva_to_vma(gpuva);
4076	struct xe_bo *bo = vma->gpuva.gem.obj ?
4077	gem_to_xe_bo(obj: vma->gpuva.gem.obj) : NULL;
4078
4079	if (!(gpuva->flags & XE_VMA_DUMPABLE))
4080	continue;
4081
4082	snap->snap[i].ofs = xe_vma_start(vma);
4083	snap->snap[i].len = xe_vma_size(vma);
4084	if (bo) {
4085	snap->snap[i].bo = xe_bo_get(bo);
4086	snap->snap[i].bo_ofs = xe_vma_bo_offset(vma);
4087	} else if (xe_vma_is_userptr(vma)) {
4088	struct mm_struct *mm =
4089	to_userptr_vma(vma)->userptr.notifier.mm;
4090
4091	if (mmget_not_zero(mm))
4092	snap->snap[i].mm = mm;
4093	else
4094	snap->snap[i].data = ERR_PTR(error: -EFAULT);
4095
4096	snap->snap[i].bo_ofs = xe_vma_userptr(vma);
4097	} else {
4098	snap->snap[i].data = ERR_PTR(error: -ENOENT);
4099	}
4100	i++;
4101	}
4102
4103	out_unlock:
4104	mutex_unlock(lock: &vm->snap_mutex);
4105	return snap;
4106	}
4107
4108	void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap)
4109	{
4110	if (IS_ERR_OR_NULL(ptr: snap))
4111	return;
4112
4113	for (int i = `0`; i < snap->num_snaps; i++) {
4114	struct xe_bo *bo = snap->snap[i].bo;
4115	int err;
4116
4117	if (IS_ERR(ptr: snap->snap[i].data))
4118	continue;
4119
4120	snap->snap[i].data = kvmalloc(snap->snap[i].len, GFP_USER);
4121	if (!snap->snap[i].data) {
4122	snap->snap[i].data = ERR_PTR(error: -ENOMEM);
4123	goto cleanup_bo;
4124	}
4125
4126	if (bo) {
4127	err = xe_bo_read(bo, offset: snap->snap[i].bo_ofs,
4128	dst: snap->snap[i].data, size: snap->snap[i].len);
4129	} else {
4130	void __user userptr = (void* __user *)(size_t)snap->snap[i].bo_ofs;
4131
4132	kthread_use_mm(mm: snap->snap[i].mm);
4133	if (!copy_from_user(to: snap->snap[i].data, from: userptr, n: snap->snap[i].len))
4134	err = `0`;
4135	else
4136	err = -EFAULT;
4137	kthread_unuse_mm(mm: snap->snap[i].mm);
4138
4139	mmput(snap->snap[i].mm);
4140	snap->snap[i].mm = NULL;
4141	}
4142
4143	if (err) {
4144	kvfree(addr: snap->snap[i].data);
4145	snap->snap[i].data = ERR_PTR(error: err);
4146	}
4147
4148	cleanup_bo:
4149	xe_bo_put(bo);
4150	snap->snap[i].bo = NULL;
4151	}
4152	}
4153
4154	void xe_vm_snapshot_print(struct xe_vm_snapshot snap, struct* drm_printer *p)
4155	{
4156	unsigned long i, j;
4157
4158	if (IS_ERR_OR_NULL(ptr: snap)) {
4159	drm_printf(p, f: "[0].error: %li\n", PTR_ERR(ptr: snap));
4160	return;
4161	}
4162
4163	for (i = `0`; i < snap->num_snaps; i++) {
4164	drm_printf(p, f: "[%llx].length: 0x%lx\n", snap->snap[i].ofs, snap->snap[i].len);
4165
4166	if (IS_ERR(ptr: snap->snap[i].data)) {
4167	drm_printf(p, f: "[%llx].error: %li\n", snap->snap[i].ofs,
4168	PTR_ERR(ptr: snap->snap[i].data));
4169	continue;
4170	}
4171
4172	drm_printf(p, f: "[%llx].data: ", snap->snap[i].ofs);
4173
4174	for (j = `0`; j < snap->snap[i].len; j += sizeof(u32)) {
4175	u32 *val = snap->snap[i].data + j;
4176	char dumped[ASCII85_BUFSZ];
4177
4178	drm_puts(p, str: ascii85_encode(in: *val, out: dumped));
4179	}
4180
4181	drm_puts(p, str: "\n");
4182
4183	if (drm_coredump_printer_is_full(p))
4184	return;
4185	}
4186	}
4187
4188	void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
4189	{
4190	unsigned long i;
4191
4192	if (IS_ERR_OR_NULL(ptr: snap))
4193	return;
4194
4195	for (i = `0`; i < snap->num_snaps; i++) {
4196	if (!IS_ERR(ptr: snap->snap[i].data))
4197	kvfree(addr: snap->snap[i].data);
4198	xe_bo_put(bo: snap->snap[i].bo);
4199	if (snap->snap[i].mm)
4200	mmput(snap->snap[i].mm);
4201	}
4202	kvfree(addr: snap);
4203	}
4204
4205	/**
4206	* xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
4207	* @xe: Pointer to the Xe device structure
4208	* @vma: Pointer to the virtual memory area (VMA) structure
4209	* @is_atomic: In pagefault path and atomic operation
4210	*
4211	* This function determines whether the given VMA needs to be migrated to
4212	* VRAM in order to do atomic GPU operation.
4213	*
4214	* Return:
4215	* 1 - Migration to VRAM is required
4216	* 0 - Migration is not required
4217	* -EACCES - Invalid access for atomic memory attr
4218	*
4219	*/
4220	int xe_vma_need_vram_for_atomic(struct xe_device xe, struct* xe_vma *vma, bool is_atomic)
4221	{
4222	u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
4223	vma->attr.atomic_access;
4224
4225	if (!IS_DGFX(xe) \|\| !is_atomic)
4226	return false;
4227
4228	/*
4229	* NOTE: The checks implemented here are platform-specific. For
4230	* instance, on a device supporting CXL atomics, these would ideally
4231	* work universally without additional handling.
4232	*/
4233	switch (atomic_access) {
4234	case DRM_XE_ATOMIC_DEVICE:
4235	return !xe->info.has_device_atomics_on_smem;
4236
4237	case DRM_XE_ATOMIC_CPU:
4238	return -EACCES;
4239
4240	case DRM_XE_ATOMIC_UNDEFINED:
4241	case DRM_XE_ATOMIC_GLOBAL:
4242	default:
4243	return `1`;
4244	}
4245	}
4246
4247	static int xe_vm_alloc_vma(struct xe_vm *vm,
4248	struct drm_gpuvm_map_req *map_req,
4249	bool is_madvise)
4250	{
4251	struct xe_vma_ops vops;
4252	struct drm_gpuva_ops *ops = NULL;
4253	struct drm_gpuva_op *__op;
4254	unsigned int vma_flags = `0`;
4255	bool remap_op = false;
4256	struct xe_vma_mem_attr tmp_attr;
4257	u16 default_pat;
4258	int err;
4259
4260	lockdep_assert_held_write(&vm->lock);
4261
4262	if (is_madvise)
4263	ops = drm_gpuvm_madvise_ops_create(gpuvm: &vm->gpuvm, req: map_req);
4264	else
4265	ops = drm_gpuvm_sm_map_ops_create(gpuvm: &vm->gpuvm, req: map_req);
4266
4267	if (IS_ERR(ptr: ops))
4268	return PTR_ERR(ptr: ops);
4269
4270	if (list_empty(head: &ops->list)) {
4271	err = `0`;
4272	goto free_ops;
4273	}
4274
4275	drm_gpuva_for_each_op(__op, ops) {
4276	struct xe_vma_op *op = gpuva_op_to_vma_op(op: __op);
4277	struct xe_vma *vma = NULL;
4278
4279	if (!is_madvise) {
4280	if (__op->op == DRM_GPUVA_OP_UNMAP) {
4281	vma = gpuva_to_vma(gpuva: op->base.unmap.va);
4282	XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
4283	default_pat = vma->attr.default_pat_index;
4284	vma_flags = vma->gpuva.flags;
4285	}
4286
4287	if (__op->op == DRM_GPUVA_OP_REMAP) {
4288	vma = gpuva_to_vma(gpuva: op->base.remap.unmap->va);
4289	default_pat = vma->attr.default_pat_index;
4290	vma_flags = vma->gpuva.flags;
4291	}
4292
4293	if (__op->op == DRM_GPUVA_OP_MAP) {
4294	op->map.vma_flags \|= vma_flags & XE_VMA_CREATE_MASK;
4295	op->map.pat_index = default_pat;
4296	}
4297	} else {
4298	if (__op->op == DRM_GPUVA_OP_REMAP) {
4299	vma = gpuva_to_vma(gpuva: op->base.remap.unmap->va);
4300	xe_assert(vm->xe, !remap_op);
4301	xe_assert(vm->xe, xe_vma_has_no_bo(vma));
4302	remap_op = true;
4303	vma_flags = vma->gpuva.flags;
4304	}
4305
4306	if (__op->op == DRM_GPUVA_OP_MAP) {
4307	xe_assert(vm->xe, remap_op);
4308	remap_op = false;
4309	/*
4310	* In case of madvise ops DRM_GPUVA_OP_MAP is
4311	* always after DRM_GPUVA_OP_REMAP, so ensure
4312	* to propagate the flags from the vma we're
4313	* unmapping.
4314	*/
4315	op->map.vma_flags \|= vma_flags & XE_VMA_CREATE_MASK;
4316	}
4317	}
4318	print_op(xe: vm->xe, op: __op);
4319	}
4320
4321	xe_vma_ops_init(vops: &vops, vm, NULL, NULL, num_syncs: `0`);
4322
4323	if (is_madvise)
4324	vops.flags \|= XE_VMA_OPS_FLAG_MADVISE;
4325
4326	err = vm_bind_ioctl_ops_parse(vm, ops, vops: &vops);
4327	if (err)
4328	goto unwind_ops;
4329
4330	xe_vm_lock(vm, intr: false);
4331
4332	drm_gpuva_for_each_op(__op, ops) {
4333	struct xe_vma_op *op = gpuva_op_to_vma_op(op: __op);
4334	struct xe_vma *vma;
4335
4336	if (__op->op == DRM_GPUVA_OP_UNMAP) {
4337	vma = gpuva_to_vma(gpuva: op->base.unmap.va);
4338	/ There should be no unmap for madvise /
4339	if (is_madvise)
4340	XE_WARN_ON("UNEXPECTED UNMAP");
4341
4342	xe_vma_destroy(vma, NULL);
4343	} else if (__op->op == DRM_GPUVA_OP_REMAP) {
4344	vma = gpuva_to_vma(gpuva: op->base.remap.unmap->va);
4345	/ In case of madvise ops Store attributes for REMAP UNMAPPED*
4346	* VMA, so they can be assigned to newly MAP created vma.
4347	*/
4348	if (is_madvise)
4349	tmp_attr = vma->attr;
4350
4351	xe_vma_destroy(vma: gpuva_to_vma(gpuva: op->base.remap.unmap->va), NULL);
4352	} else if (__op->op == DRM_GPUVA_OP_MAP) {
4353	vma = op->map.vma;
4354	/ In case of madvise call, MAP will always be followed by REMAP.*
4355	* Therefore temp_attr will always have sane values, making it safe to
4356	* copy them to new vma.
4357	*/
4358	if (is_madvise)
4359	vma->attr = tmp_attr;
4360	}
4361	}
4362
4363	xe_vm_unlock(vm);
4364	drm_gpuva_ops_free(gpuvm: &vm->gpuvm, ops);
4365	return `0`;
4366
4367	unwind_ops:
4368	vm_bind_ioctl_ops_unwind(vm, ops: &ops, num_ops_list: `1`);
4369	free_ops:
4370	drm_gpuva_ops_free(gpuvm: &vm->gpuvm, ops);
4371	return err;
4372	}
4373
4374	/**
4375	* xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
4376	* @vm: Pointer to the xe_vm structure
4377	* @start: Starting input address
4378	* @range: Size of the input range
4379	*
4380	* This function splits existing vma to create new vma for user provided input range
4381	*
4382	* Return: 0 if success
4383	*/
4384	int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4385	{
4386	struct drm_gpuvm_map_req map_req = {
4387	.map.va.addr = start,
4388	.map.va.range = range,
4389	};
4390
4391	lockdep_assert_held_write(&vm->lock);
4392
4393	vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
4394
4395	return xe_vm_alloc_vma(vm, map_req: &map_req, is_madvise: true);
4396	}
4397
4398	/**
4399	* xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
4400	* @vm: Pointer to the xe_vm structure
4401	* @start: Starting input address
4402	* @range: Size of the input range
4403	*
4404	* This function splits/merges existing vma to create new vma for user provided input range
4405	*
4406	* Return: 0 if success
4407	*/
4408	int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
4409	{
4410	struct drm_gpuvm_map_req map_req = {
4411	.map.va.addr = start,
4412	.map.va.range = range,
4413	};
4414
4415	lockdep_assert_held_write(&vm->lock);
4416
4417	vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
4418	start, range);
4419
4420	return xe_vm_alloc_vma(vm, map_req: &map_req, is_madvise: false);
4421	}
4422

source code of linux/drivers/gpu/drm/xe/xe_vm.c