intel_ggtt.c source code [linux/drivers/gpu/drm/i915/gt/intel_ggtt.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2020 Intel Corporation
4	*/
5
6	#include <asm/set_memory.h>
7	#include <asm/smp.h>
8	#include <linux/types.h>
9	#include <linux/stop_machine.h>
10
11	#include <drm/drm_managed.h>
12	#include <drm/drm_print.h>
13	#include <drm/intel/i915_drm.h>
14	#include <drm/intel/intel-gtt.h>
15
16	#include "gem/i915_gem_lmem.h"
17
18	#include "intel_context.h"
19	#include "intel_ggtt_gmch.h"
20	#include "intel_gpu_commands.h"
21	#include "intel_gt.h"
22	#include "intel_gt_regs.h"
23	#include "intel_pci_config.h"
24	#include "intel_ring.h"
25	#include "i915_drv.h"
26	#include "i915_pci.h"
27	#include "i915_reg.h"
28	#include "i915_request.h"
29	#include "i915_scatterlist.h"
30	#include "i915_utils.h"
31	#include "i915_vgpu.h"
32
33	#include "intel_gtt.h"
34	#include "gen8_ppgtt.h"
35	#include "intel_engine_pm.h"
36
37	static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
38	unsigned long color,
39	u64 *start,
40	u64 *end)
41	{
42	if (i915_node_color_differs(node, color))
43	*start += I915_GTT_PAGE_SIZE;
44
45	/*
46	* Also leave a space between the unallocated reserved node after the
47	* GTT and any objects within the GTT, i.e. we use the color adjustment
48	* to insert a guard page to prevent prefetches crossing over the
49	* GTT boundary.
50	*/
51	node = list_next_entry(node, node_list);
52	if (node->color != color)
53	*end -= I915_GTT_PAGE_SIZE;
54	}
55
56	static int ggtt_init_hw(struct i915_ggtt *ggtt)
57	{
58	struct drm_i915_private *i915 = ggtt->vm.i915;
59
60	i915_address_space_init(vm: &ggtt->vm, VM_CLASS_GGTT);
61
62	ggtt->vm.is_ggtt = true;
63
64	/ Only VLV supports read-only GGTT mappings /
65	ggtt->vm.has_read_only = IS_VALLEYVIEW(i915);
66
67	if (!HAS_LLC(i915) && !HAS_PPGTT(i915))
68	ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust;
69
70	if (ggtt->mappable_end) {
71	if (!io_mapping_init_wc(iomap: &ggtt->iomap,
72	base: ggtt->gmadr.start,
73	size: ggtt->mappable_end)) {
74	ggtt->vm.cleanup(&ggtt->vm);
75	return -EIO;
76	}
77
78	ggtt->mtrr = arch_phys_wc_add(base: ggtt->gmadr.start,
79	size: ggtt->mappable_end);
80	}
81
82	intel_ggtt_init_fences(ggtt);
83
84	return `0`;
85	}
86
87	/**
88	* i915_ggtt_init_hw - Initialize GGTT hardware
89	* @i915: i915 device
90	*/
91	int i915_ggtt_init_hw(struct drm_i915_private *i915)
92	{
93	int ret;
94
95	/*
96	* Note that we use page colouring to enforce a guard page at the
97	* end of the address space. This is required as the CS may prefetch
98	* beyond the end of the batch buffer, across the page boundary,
99	* and beyond the end of the GTT if we do not provide a guard.
100	*/
101	ret = ggtt_init_hw(ggtt: to_gt(i915)->ggtt);
102	if (ret)
103	return ret;
104
105	return `0`;
106	}
107
108	/**
109	* i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM
110	* @vm: The VM to suspend the mappings for
111	* @evict_all: Evict all VMAs
112	*
113	* Suspend the memory mappings for all objects mapped to HW via the GGTT or a
114	* DPT page table.
115	*/
116	void i915_ggtt_suspend_vm(struct i915_address_space *vm, bool evict_all)
117	{
118	struct i915_vma vma, vn;
119	int save_skip_rewrite;
120
121	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
122
123	retry:
124	i915_gem_drain_freed_objects(i915: vm->i915);
125
126	mutex_lock(&vm->mutex);
127
128	/*
129	* Skip rewriting PTE on VMA unbind.
130	* FIXME: Use an argument to i915_vma_unbind() instead?
131	*/
132	save_skip_rewrite = vm->skip_pte_rewrite;
133	vm->skip_pte_rewrite = true;
134
135	list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) {
136	struct drm_i915_gem_object *obj = vma->obj;
137
138	GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
139
140	if (i915_vma_is_pinned(vma) \|\| !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND))
141	continue;
142
143	/ unlikely to race when GPU is idle, so no worry about slowpath.. /
144	if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) {
145	/*
146	* No dead objects should appear here, GPU should be
147	* completely idle, and userspace suspended
148	*/
149	i915_gem_object_get(obj);
150
151	mutex_unlock(lock: &vm->mutex);
152
153	i915_gem_object_lock(obj, NULL);
154	GEM_WARN_ON(i915_vma_unbind(vma));
155	i915_gem_object_unlock(obj);
156	i915_gem_object_put(obj);
157
158	vm->skip_pte_rewrite = save_skip_rewrite;
159	goto retry;
160	}
161
162	if (evict_all \|\| !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) {
163	i915_vma_wait_for_bind(vma);
164
165	__i915_vma_evict(vma, async: false);
166	drm_mm_remove_node(node: &vma->node);
167	}
168
169	i915_gem_object_unlock(obj);
170	}
171
172	vm->clear_range(vm, `0`, vm->total);
173
174	vm->skip_pte_rewrite = save_skip_rewrite;
175
176	mutex_unlock(lock: &vm->mutex);
177
178	drm_WARN_ON(&vm->i915->drm, evict_all && !list_empty(&vm->bound_list));
179	}
180
181	void i915_ggtt_suspend(struct i915_ggtt *ggtt)
182	{
183	struct intel_gt *gt;
184
185	i915_ggtt_suspend_vm(vm: &ggtt->vm, evict_all: false);
186	ggtt->invalidate(ggtt);
187
188	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
189	intel_gt_check_and_clear_faults(gt);
190	}
191
192	void gen6_ggtt_invalidate(struct i915_ggtt *ggtt)
193	{
194	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
195
196	spin_lock_irq(lock: &uncore->lock);
197	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
198	intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6);
199	spin_unlock_irq(lock: &uncore->lock);
200	}
201
202	static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915)
203	{
204	/*
205	* On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range
206	* will be dropped. For WC mappings in general we have 64 byte burst
207	* writes when the WC buffer is flushed, so we can't use it, but have to
208	* resort to an uncached mapping. The WC issue is easily caught by the
209	* readback check when writing GTT PTE entries.
210	*/
211	if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < `11`)
212	return true;
213
214	return false;
215	}
216
217	static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt)
218	{
219	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
220
221	/*
222	* Note that as an uncached mmio write, this will flush the
223	* WCB of the writes into the GGTT before it triggers the invalidate.
224	*
225	* Only perform this when GGTT is mapped as WC, see ggtt_probe_common().
226	*/
227	if (needs_wc_ggtt_mapping(i915: ggtt->vm.i915))
228	intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6,
229	GFX_FLSH_CNTL_EN);
230	}
231
232	static void guc_ggtt_ct_invalidate(struct intel_gt *gt)
233	{
234	struct intel_uncore *uncore = gt->uncore;
235	intel_wakeref_t wakeref;
236
237	with_intel_runtime_pm_if_active(uncore->rpm, wakeref)
238	intel_guc_invalidate_tlb_guc(guc: gt_to_guc(gt));
239	}
240
241	static void guc_ggtt_invalidate(struct i915_ggtt *ggtt)
242	{
243	struct drm_i915_private *i915 = ggtt->vm.i915;
244	struct intel_gt *gt;
245
246	gen8_ggtt_invalidate(ggtt);
247
248	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) {
249	if (intel_guc_tlb_invalidation_is_available(guc: gt_to_guc(gt)))
250	guc_ggtt_ct_invalidate(gt);
251	else if (GRAPHICS_VER(i915) >= `12`)
252	intel_uncore_write_fw(gt->uncore,
253	GEN12_GUC_TLB_INV_CR,
254	GEN12_GUC_TLB_INV_CR_INVALIDATE);
255	else
256	intel_uncore_write_fw(gt->uncore,
257	GEN8_GTCR, GEN8_GTCR_INVALIDATE);
258	}
259	}
260
261	static u64 mtl_ggtt_pte_encode(dma_addr_t addr,
262	unsigned int pat_index,
263	u32 flags)
264	{
265	gen8_pte_t pte = addr \| GEN8_PAGE_PRESENT;
266
267	WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK);
268
269	if (flags & PTE_LM)
270	pte \|= GEN12_GGTT_PTE_LM;
271
272	if (pat_index & BIT(`0`))
273	pte \|= MTL_GGTT_PTE_PAT0;
274
275	if (pat_index & BIT(`1`))
276	pte \|= MTL_GGTT_PTE_PAT1;
277
278	return pte;
279	}
280
281	u64 gen8_ggtt_pte_encode(dma_addr_t addr,
282	unsigned int pat_index,
283	u32 flags)
284	{
285	gen8_pte_t pte = addr \| GEN8_PAGE_PRESENT;
286
287	if (flags & PTE_LM)
288	pte \|= GEN12_GGTT_PTE_LM;
289
290	return pte;
291	}
292
293	static dma_addr_t gen8_ggtt_pte_decode(u64 pte, bool is_present, bool is_local)
294	{
295	*is_present = pte & GEN8_PAGE_PRESENT;
296	*is_local = pte & GEN12_GGTT_PTE_LM;
297
298	return pte & GEN12_GGTT_PTE_ADDR_MASK;
299	}
300
301	static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
302	{
303	struct intel_gt *gt = ggtt->vm.gt;
304
305	return intel_gt_is_bind_context_ready(gt);
306	}
307
308	static struct intel_context gen8_ggtt_bind_get_ce(struct* i915_ggtt ggtt, intel_wakeref_t wakeref)
309	{
310	struct intel_context *ce;
311	struct intel_gt *gt = ggtt->vm.gt;
312
313	if (intel_gt_is_wedged(gt))
314	return NULL;
315
316	ce = gt->engine[BCS0]->bind_context;
317	GEM_BUG_ON(!ce);
318
319	/*
320	* If the GT is not awake already at this stage then fallback
321	* to pci based GGTT update otherwise __intel_wakeref_get_first()
322	* would conflict with fs_reclaim trying to allocate memory while
323	* doing rpm_resume().
324	*/
325	*wakeref = intel_gt_pm_get_if_awake(gt);
326	if (!*wakeref)
327	return NULL;
328
329	intel_engine_pm_get(engine: ce->engine);
330
331	return ce;
332	}
333
334	static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref)
335	{
336	intel_engine_pm_put(engine: ce->engine);
337	intel_gt_pm_put(gt: ce->engine->gt, handle: wakeref);
338	}
339
340	static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
341	struct sg_table *pages, u32 num_entries,
342	const gen8_pte_t pte)
343	{
344	struct i915_sched_attr attr = {};
345	struct intel_gt *gt = ggtt->vm.gt;
346	const gen8_pte_t scratch_pte = ggtt->vm.scratch[`0`]->encode;
347	struct sgt_iter iter;
348	struct i915_request *rq;
349	struct intel_context *ce;
350	intel_wakeref_t wakeref;
351	u32 *cs;
352
353	if (!num_entries)
354	return true;
355
356	ce = gen8_ggtt_bind_get_ce(ggtt, wakeref: &wakeref);
357	if (!ce)
358	return false;
359
360	if (pages)
361	iter = __sgt_iter(sgl: pages->sgl, dma: true);
362
363	while (num_entries) {
364	int count = `0`;
365	dma_addr_t addr;
366	/*
367	* MI_UPDATE_GTT can update 512 entries in a single command but
368	* that end up with engine reset, 511 works.
369	*/
370	u32 n_ptes = min_t(u32, `511`, num_entries);
371
372	if (mutex_lock_interruptible(&ce->timeline->mutex))
373	goto put_ce;
374
375	intel_context_enter(ce);
376	rq = __i915_request_create(ce, GFP_NOWAIT \| GFP_ATOMIC);
377	intel_context_exit(ce);
378	if (IS_ERR(ptr: rq)) {
379	GT_TRACE(gt, "Failed to get bind request\n");
380	mutex_unlock(lock: &ce->timeline->mutex);
381	goto put_ce;
382	}
383
384	cs = intel_ring_begin(rq, num_dwords: `2` * n_ptes + `2`);
385	if (IS_ERR(ptr: cs)) {
386	GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
387	i915_request_set_error_once(rq, error: PTR_ERR(ptr: cs));
388	/ once a request is created, it must be queued /
389	goto queue_err_rq;
390	}
391
392	cs++ = MI_UPDATE_GTT \| (`2` n_ptes);
393	*cs++ = offset << `12`;
394
395	if (pages) {
396	for_each_sgt_daddr_next(addr, iter) {
397	if (count == n_ptes)
398	break;
399	*cs++ = lower_32_bits(pte \| addr);
400	*cs++ = upper_32_bits(pte \| addr);
401	count++;
402	}
403	/ fill remaining with scratch pte, if any /
404	if (count < n_ptes) {
405	memset64(s: (u64 *)cs, v: scratch_pte,
406	n: n_ptes - count);
407	cs += (n_ptes - count) * `2`;
408	}
409	} else {
410	memset64(s: (u64 *)cs, v: pte, n: n_ptes);
411	cs += n_ptes * `2`;
412	}
413
414	intel_ring_advance(rq, cs);
415	queue_err_rq:
416	i915_request_get(rq);
417	__i915_request_commit(request: rq);
418	__i915_request_queue(rq, attr: &attr);
419
420	mutex_unlock(lock: &ce->timeline->mutex);
421	/ This will break if the request is complete or after engine reset /
422	i915_request_wait(rq, flags: `0`, MAX_SCHEDULE_TIMEOUT);
423	if (rq->fence.error)
424	goto err_rq;
425
426	i915_request_put(rq);
427
428	num_entries -= n_ptes;
429	offset += n_ptes;
430	}
431
432	gen8_ggtt_bind_put_ce(ce, wakeref);
433	return true;
434
435	err_rq:
436	i915_request_put(rq);
437	put_ce:
438	gen8_ggtt_bind_put_ce(ce, wakeref);
439	return false;
440	}
441
442	static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
443	{
444	writeq(val: pte, addr);
445	}
446
447	static gen8_pte_t gen8_get_pte(void __iomem *addr)
448	{
449	return readq(addr);
450	}
451
452	static void gen8_ggtt_insert_page(struct i915_address_space *vm,
453	dma_addr_t addr,
454	u64 offset,
455	unsigned int pat_index,
456	u32 flags)
457	{
458	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
459	gen8_pte_t __iomem *pte =
460	(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
461
462	gen8_set_pte(addr: pte, pte: ggtt->vm.pte_encode(addr, pat_index, flags));
463
464	ggtt->invalidate(ggtt);
465	}
466
467	static dma_addr_t gen8_ggtt_read_entry(struct i915_address_space *vm,
468	u64 offset, bool is_present, bool is_local)
469	{
470	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
471	gen8_pte_t __iomem *pte =
472	(gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
473
474	return ggtt->vm.pte_decode(gen8_get_pte(addr: pte), is_present, is_local);
475	}
476
477	static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
478	dma_addr_t addr, u64 offset,
479	unsigned int pat_index, u32 flags)
480	{
481	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
482	gen8_pte_t pte;
483
484	pte = ggtt->vm.pte_encode(addr, pat_index, flags);
485	if (should_update_ggtt_with_bind(ggtt: i915_vm_to_ggtt(vm)) &&
486	gen8_ggtt_bind_ptes(ggtt, offset, NULL, num_entries: `1`, pte))
487	return ggtt->invalidate(ggtt);
488
489	gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
490	}
491
492	static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
493	struct i915_vma_resource *vma_res,
494	unsigned int pat_index,
495	u32 flags)
496	{
497	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
498	const gen8_pte_t pte_encode = ggtt->vm.pte_encode(`0`, pat_index, flags);
499	gen8_pte_t __iomem *gte;
500	gen8_pte_t __iomem *end;
501	struct sgt_iter iter;
502	dma_addr_t addr;
503
504	/*
505	* Note that we ignore PTE_READ_ONLY here. The caller must be careful
506	* not to allow the user to override access to a read only page.
507	*/
508
509	gte = (gen8_pte_t __iomem *)ggtt->gsm;
510	gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
511	end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
512	while (gte < end)
513	gen8_set_pte(addr: gte++, pte: vm->scratch[`0`]->encode);
514	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
515
516	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
517	gen8_set_pte(addr: gte++, pte: pte_encode \| addr);
518	GEM_BUG_ON(gte > end);
519
520	/ Fill the allocated but "unused" space beyond the end of the buffer /
521	while (gte < end)
522	gen8_set_pte(addr: gte++, pte: vm->scratch[`0`]->encode);
523
524	/*
525	* We want to flush the TLBs only after we're certain all the PTE
526	* updates have finished.
527	*/
528	ggtt->invalidate(ggtt);
529	}
530
531	static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
532	struct i915_vma_resource *vma_res,
533	unsigned int pat_index, u32 flags)
534	{
535	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
536	gen8_pte_t scratch_pte = vm->scratch[`0`]->encode;
537	gen8_pte_t pte_encode;
538	u64 start, end;
539
540	pte_encode = ggtt->vm.pte_encode(`0`, pat_index, flags);
541	start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
542	end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
543	if (!gen8_ggtt_bind_ptes(ggtt, offset: start, NULL, num_entries: end - start, pte: scratch_pte))
544	goto err;
545
546	start = end;
547	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
548	if (!gen8_ggtt_bind_ptes(ggtt, offset: start, pages: vma_res->bi.pages,
549	num_entries: vma_res->node_size / I915_GTT_PAGE_SIZE, pte: pte_encode))
550	goto err;
551
552	start += vma_res->node_size / I915_GTT_PAGE_SIZE;
553	if (!gen8_ggtt_bind_ptes(ggtt, offset: start, NULL, num_entries: end - start, pte: scratch_pte))
554	goto err;
555
556	return true;
557
558	err:
559	return false;
560	}
561
562	static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
563	struct i915_vma_resource *vma_res,
564	unsigned int pat_index, u32 flags)
565	{
566	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
567
568	if (should_update_ggtt_with_bind(ggtt: i915_vm_to_ggtt(vm)) &&
569	__gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
570	return ggtt->invalidate(ggtt);
571
572	gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
573	}
574
575	static void gen8_ggtt_clear_range(struct i915_address_space *vm,
576	u64 start, u64 length)
577	{
578	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
579	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
580	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
581	const gen8_pte_t scratch_pte = vm->scratch[`0`]->encode;
582	gen8_pte_t __iomem *gtt_base =
583	(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
584	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
585	int i;
586
587	if (WARN(num_entries > max_entries,
588	"First entry = %d; Num entries = %d (max=%d)\n",
589	first_entry, num_entries, max_entries))
590	num_entries = max_entries;
591
592	for (i = `0`; i < num_entries; i++)
593	gen8_set_pte(addr: &gtt_base[i], pte: scratch_pte);
594	}
595
596	static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
597	u64 start, u64 length)
598	{
599	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
600	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
601	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
602	const gen8_pte_t scratch_pte = vm->scratch[`0`]->encode;
603	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
604
605	if (WARN(num_entries > max_entries,
606	"First entry = %d; Num entries = %d (max=%d)\n",
607	first_entry, num_entries, max_entries))
608	num_entries = max_entries;
609
610	if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, offset: first_entry,
611	NULL, num_entries, pte: scratch_pte))
612	return ggtt->invalidate(ggtt);
613
614	gen8_ggtt_clear_range(vm, start, length);
615	}
616
617	static void gen6_ggtt_insert_page(struct i915_address_space *vm,
618	dma_addr_t addr,
619	u64 offset,
620	unsigned int pat_index,
621	u32 flags)
622	{
623	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
624	gen6_pte_t __iomem *pte =
625	(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
626
627	iowrite32(vm->pte_encode(addr, pat_index, flags), pte);
628
629	ggtt->invalidate(ggtt);
630	}
631
632	static dma_addr_t gen6_ggtt_read_entry(struct i915_address_space *vm,
633	u64 offset,
634	bool is_present, bool is_local)
635	{
636	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
637	gen6_pte_t __iomem *pte =
638	(gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE;
639
640	return vm->pte_decode(ioread32(pte), is_present, is_local);
641	}
642
643	/*
644	* Binds an object into the global gtt with the specified cache level.
645	* The object will be accessible to the GPU via commands whose operands
646	* reference offsets within the global GTT as well as accessible by the GPU
647	* through the GMADR mapped BAR (i915->mm.gtt->gtt).
648	*/
649	static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
650	struct i915_vma_resource *vma_res,
651	unsigned int pat_index,
652	u32 flags)
653	{
654	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
655	gen6_pte_t __iomem *gte;
656	gen6_pte_t __iomem *end;
657	struct sgt_iter iter;
658	dma_addr_t addr;
659
660	gte = (gen6_pte_t __iomem *)ggtt->gsm;
661	gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
662
663	end = gte + vma_res->guard / I915_GTT_PAGE_SIZE;
664	while (gte < end)
665	iowrite32(vm->scratch[`0`]->encode, gte++);
666	end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
667	for_each_sgt_daddr(addr, iter, vma_res->bi.pages)
668	iowrite32(vm->pte_encode(addr, pat_index, flags), gte++);
669	GEM_BUG_ON(gte > end);
670
671	/ Fill the allocated but "unused" space beyond the end of the buffer /
672	while (gte < end)
673	iowrite32(vm->scratch[`0`]->encode, gte++);
674
675	/*
676	* We want to flush the TLBs only after we're certain all the PTE
677	* updates have finished.
678	*/
679	ggtt->invalidate(ggtt);
680	}
681
682	static void nop_clear_range(struct i915_address_space *vm,
683	u64 start, u64 length)
684	{
685	}
686
687	static void bxt_vtd_ggtt_wa(struct i915_address_space *vm)
688	{
689	/*
690	* Make sure the internal GAM fifo has been cleared of all GTT
691	* writes before exiting stop_machine(). This guarantees that
692	* any aperture accesses waiting to start in another process
693	* cannot back up behind the GTT writes causing a hang.
694	* The register can be any arbitrary GAM register.
695	*/
696	intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6);
697	}
698
699	struct insert_page {
700	struct i915_address_space *vm;
701	dma_addr_t addr;
702	u64 offset;
703	unsigned int pat_index;
704	};
705
706	static int bxt_vtd_ggtt_insert_page__cb(void *_arg)
707	{
708	struct insert_page *arg = _arg;
709
710	gen8_ggtt_insert_page(vm: arg->vm, addr: arg->addr, offset: arg->offset,
711	pat_index: arg->pat_index, flags: `0`);
712	bxt_vtd_ggtt_wa(vm: arg->vm);
713
714	return `0`;
715	}
716
717	static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm,
718	dma_addr_t addr,
719	u64 offset,
720	unsigned int pat_index,
721	u32 unused)
722	{
723	struct insert_page arg = { vm, addr, offset, pat_index };
724
725	stop_machine(fn: bxt_vtd_ggtt_insert_page__cb, data: &arg, NULL);
726	}
727
728	struct insert_entries {
729	struct i915_address_space *vm;
730	struct i915_vma_resource *vma_res;
731	unsigned int pat_index;
732	u32 flags;
733	};
734
735	static int bxt_vtd_ggtt_insert_entries__cb(void *_arg)
736	{
737	struct insert_entries *arg = _arg;
738
739	gen8_ggtt_insert_entries(vm: arg->vm, vma_res: arg->vma_res,
740	pat_index: arg->pat_index, flags: arg->flags);
741	bxt_vtd_ggtt_wa(vm: arg->vm);
742
743	return `0`;
744	}
745
746	static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm,
747	struct i915_vma_resource *vma_res,
748	unsigned int pat_index,
749	u32 flags)
750	{
751	struct insert_entries arg = { vm, vma_res, pat_index, flags };
752
753	stop_machine(fn: bxt_vtd_ggtt_insert_entries__cb, data: &arg, NULL);
754	}
755
756	static void gen6_ggtt_clear_range(struct i915_address_space *vm,
757	u64 start, u64 length)
758	{
759	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
760	unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
761	unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
762	gen6_pte_t scratch_pte, __iomem *gtt_base =
763	(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
764	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
765	int i;
766
767	if (WARN(num_entries > max_entries,
768	"First entry = %d; Num entries = %d (max=%d)\n",
769	first_entry, num_entries, max_entries))
770	num_entries = max_entries;
771
772	scratch_pte = vm->scratch[`0`]->encode;
773	for (i = `0`; i < num_entries; i++)
774	iowrite32(scratch_pte, &gtt_base[i]);
775	}
776
777	void intel_ggtt_bind_vma(struct i915_address_space *vm,
778	struct i915_vm_pt_stash *stash,
779	struct i915_vma_resource *vma_res,
780	unsigned int pat_index,
781	u32 flags)
782	{
783	u32 pte_flags;
784
785	if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK))
786	return;
787
788	vma_res->bound_flags \|= flags;
789
790	/ Applicable to VLV (gen8+ do not support RO in the GGTT) /
791	pte_flags = `0`;
792	if (vma_res->bi.readonly)
793	pte_flags \|= PTE_READ_ONLY;
794	if (vma_res->bi.lmem)
795	pte_flags \|= PTE_LM;
796
797	vm->insert_entries(vm, vma_res, pat_index, pte_flags);
798	vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE;
799	}
800
801	void intel_ggtt_unbind_vma(struct i915_address_space *vm,
802	struct i915_vma_resource *vma_res)
803	{
804	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
805	}
806
807	dma_addr_t intel_ggtt_read_entry(struct i915_address_space *vm,
808	u64 offset, bool is_present, bool is_local)
809	{
810	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
811
812	return ggtt->vm.read_entry(vm, offset, is_present, is_local);
813	}
814
815	/*
816	* Reserve the top of the GuC address space for firmware images. Addresses
817	* beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC,
818	* which makes for a suitable range to hold GuC/HuC firmware images if the
819	* size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT
820	* is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk
821	* of the same size anyway, which is far more than needed, to keep the logic
822	* in uc_fw_ggtt_offset() simple.
823	*/
824	#define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP)
825
826	static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt)
827	{
828	u64 offset;
829	int ret;
830
831	if (!intel_uc_uses_guc(uc: &ggtt->vm.gt->uc))
832	return `0`;
833
834	GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE);
835	offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE;
836
837	ret = i915_gem_gtt_reserve(vm: &ggtt->vm, NULL, node: &ggtt->uc_fw,
838	GUC_TOP_RESERVE_SIZE, offset,
839	I915_COLOR_UNEVICTABLE, PIN_NOEVICT);
840	if (ret)
841	drm_dbg(&ggtt->vm.i915->drm,
842	"Failed to reserve top of GGTT for GuC\n");
843
844	return ret;
845	}
846
847	static void ggtt_release_guc_top(struct i915_ggtt *ggtt)
848	{
849	if (drm_mm_node_allocated(node: &ggtt->uc_fw))
850	drm_mm_remove_node(node: &ggtt->uc_fw);
851	}
852
853	static void cleanup_init_ggtt(struct i915_ggtt *ggtt)
854	{
855	ggtt_release_guc_top(ggtt);
856	if (drm_mm_node_allocated(node: &ggtt->error_capture))
857	drm_mm_remove_node(node: &ggtt->error_capture);
858	mutex_destroy(lock: &ggtt->error_mutex);
859	}
860
861	static int init_ggtt(struct i915_ggtt *ggtt)
862	{
863	/*
864	* Let GEM Manage all of the aperture.
865	*
866	* However, leave one page at the end still bound to the scratch page.
867	* There are a number of places where the hardware apparently prefetches
868	* past the end of the object, and we've seen multiple hangs with the
869	* GPU head pointer stuck in a batchbuffer bound at the last page of the
870	* aperture. One page should be enough to keep any prefetching inside
871	* of the aperture.
872	*/
873	unsigned long hole_start, hole_end;
874	struct drm_mm_node *entry;
875	int ret;
876
877	/*
878	* GuC requires all resources that we're sharing with it to be placed in
879	* non-WOPCM memory. If GuC is not present or not in use we still need a
880	* small bias as ring wraparound at offset 0 sometimes hangs. No idea
881	* why.
882	*/
883	ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE,
884	intel_wopcm_guc_size(&ggtt->vm.gt->wopcm));
885
886	ret = intel_vgt_balloon(ggtt);
887	if (ret)
888	return ret;
889
890	mutex_init(&ggtt->error_mutex);
891	if (ggtt->mappable_end) {
892	/*
893	* Reserve a mappable slot for our lockless error capture.
894	*
895	* We strongly prefer taking address 0x0 in order to protect
896	* other critical buffers against accidental overwrites,
897	* as writing to address 0 is a very common mistake.
898	*
899	* Since 0 may already be in use by the system (e.g. the BIOS
900	* framebuffer), we let the reservation fail quietly and hope
901	* 0 remains reserved always.
902	*
903	* If we fail to reserve 0, and then fail to find any space
904	* for an error-capture, remain silent. We can afford not
905	* to reserve an error_capture node as we have fallback
906	* paths, and we trust that 0 will remain reserved. However,
907	* the only likely reason for failure to insert is a driver
908	* bug, which we expect to cause other failures...
909	*
910	* Since CPU can perform speculative reads on error capture
911	* (write-combining allows it) add scratch page after error
912	* capture to avoid DMAR errors.
913	*/
914	ggtt->error_capture.size = `2` * I915_GTT_PAGE_SIZE;
915	ggtt->error_capture.color = I915_COLOR_UNEVICTABLE;
916	if (drm_mm_reserve_node(mm: &ggtt->vm.mm, node: &ggtt->error_capture))
917	drm_mm_insert_node_in_range(mm: &ggtt->vm.mm,
918	node: &ggtt->error_capture,
919	size: ggtt->error_capture.size, alignment: `0`,
920	color: ggtt->error_capture.color,
921	start: `0`, end: ggtt->mappable_end,
922	mode: DRM_MM_INSERT_LOW);
923	}
924	if (drm_mm_node_allocated(node: &ggtt->error_capture)) {
925	u64 start = ggtt->error_capture.start;
926	u64 size = ggtt->error_capture.size;
927
928	ggtt->vm.scratch_range(&ggtt->vm, start, size);
929	drm_dbg(&ggtt->vm.i915->drm,
930	"Reserved GGTT:[%llx, %llx] for use by error capture\n",
931	start, start + size);
932	}
933
934	/*
935	* The upper portion of the GuC address space has a sizeable hole
936	* (several MB) that is inaccessible by GuC. Reserve this range within
937	* GGTT as it can comfortably hold GuC/HuC firmware images.
938	*/
939	ret = ggtt_reserve_guc_top(ggtt);
940	if (ret)
941	goto err;
942
943	/ Clear any non-preallocated blocks /
944	drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) {
945	drm_dbg(&ggtt->vm.i915->drm,
946	"clearing unused GTT space: [%lx, %lx]\n",
947	hole_start, hole_end);
948	ggtt->vm.clear_range(&ggtt->vm, hole_start,
949	hole_end - hole_start);
950	}
951
952	/ And finally clear the reserved guard page /
953	ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE);
954
955	return `0`;
956
957	err:
958	cleanup_init_ggtt(ggtt);
959	return ret;
960	}
961
962	static void aliasing_gtt_bind_vma(struct i915_address_space *vm,
963	struct i915_vm_pt_stash *stash,
964	struct i915_vma_resource *vma_res,
965	unsigned int pat_index,
966	u32 flags)
967	{
968	u32 pte_flags;
969
970	/ Currently applicable only to VLV /
971	pte_flags = `0`;
972	if (vma_res->bi.readonly)
973	pte_flags \|= PTE_READ_ONLY;
974
975	if (flags & I915_VMA_LOCAL_BIND)
976	ppgtt_bind_vma(vm: &i915_vm_to_ggtt(vm)->alias->vm,
977	stash, vma_res, pat_index, flags);
978
979	if (flags & I915_VMA_GLOBAL_BIND)
980	vm->insert_entries(vm, vma_res, pat_index, pte_flags);
981
982	vma_res->bound_flags \|= flags;
983	}
984
985	static void aliasing_gtt_unbind_vma(struct i915_address_space *vm,
986	struct i915_vma_resource *vma_res)
987	{
988	if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND)
989	vm->clear_range(vm, vma_res->start, vma_res->vma_size);
990
991	if (vma_res->bound_flags & I915_VMA_LOCAL_BIND)
992	ppgtt_unbind_vma(vm: &i915_vm_to_ggtt(vm)->alias->vm, vma_res);
993	}
994
995	static int init_aliasing_ppgtt(struct i915_ggtt *ggtt)
996	{
997	struct i915_vm_pt_stash stash = {};
998	struct i915_ppgtt *ppgtt;
999	int err;
1000
1001	ppgtt = i915_ppgtt_create(gt: ggtt->vm.gt, lmem_pt_obj_flags: `0`);
1002	if (IS_ERR(ptr: ppgtt))
1003	return PTR_ERR(ptr: ppgtt);
1004
1005	if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) {
1006	err = -ENODEV;
1007	goto err_ppgtt;
1008	}
1009
1010	err = i915_vm_alloc_pt_stash(vm: &ppgtt->vm, stash: &stash, size: ggtt->vm.total);
1011	if (err)
1012	goto err_ppgtt;
1013
1014	i915_gem_object_lock(obj: ppgtt->vm.scratch[`0`], NULL);
1015	err = i915_vm_map_pt_stash(vm: &ppgtt->vm, stash: &stash);
1016	i915_gem_object_unlock(obj: ppgtt->vm.scratch[`0`]);
1017	if (err)
1018	goto err_stash;
1019
1020	/*
1021	* Note we only pre-allocate as far as the end of the global
1022	* GTT. On 48b / 4-level page-tables, the difference is very,
1023	* very significant! We have to preallocate as GVT/vgpu does
1024	* not like the page directory disappearing.
1025	*/
1026	ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, `0`, ggtt->vm.total);
1027
1028	ggtt->alias = ppgtt;
1029	ggtt->vm.bind_async_flags \|= ppgtt->vm.bind_async_flags;
1030
1031	GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma);
1032	ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma;
1033
1034	GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma);
1035	ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma;
1036
1037	i915_vm_free_pt_stash(vm: &ppgtt->vm, stash: &stash);
1038	return `0`;
1039
1040	err_stash:
1041	i915_vm_free_pt_stash(vm: &ppgtt->vm, stash: &stash);
1042	err_ppgtt:
1043	i915_vm_put(vm: &ppgtt->vm);
1044	return err;
1045	}
1046
1047	static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt)
1048	{
1049	struct i915_ppgtt *ppgtt;
1050
1051	ppgtt = fetch_and_zero(&ggtt->alias);
1052	if (!ppgtt)
1053	return;
1054
1055	i915_vm_put(vm: &ppgtt->vm);
1056
1057	ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
1058	ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
1059	}
1060
1061	int i915_init_ggtt(struct drm_i915_private *i915)
1062	{
1063	int ret;
1064
1065	ret = init_ggtt(ggtt: to_gt(i915)->ggtt);
1066	if (ret)
1067	return ret;
1068
1069	if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) {
1070	ret = init_aliasing_ppgtt(ggtt: to_gt(i915)->ggtt);
1071	if (ret)
1072	cleanup_init_ggtt(ggtt: to_gt(i915)->ggtt);
1073	}
1074
1075	return `0`;
1076	}
1077
1078	static void ggtt_cleanup_hw(struct i915_ggtt *ggtt)
1079	{
1080	struct i915_vma vma, vn;
1081
1082	flush_workqueue(ggtt->vm.i915->wq);
1083	i915_gem_drain_freed_objects(i915: ggtt->vm.i915);
1084
1085	mutex_lock(&ggtt->vm.mutex);
1086
1087	ggtt->vm.skip_pte_rewrite = true;
1088
1089	list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) {
1090	struct drm_i915_gem_object *obj = vma->obj;
1091	bool trylock;
1092
1093	trylock = i915_gem_object_trylock(obj, NULL);
1094	WARN_ON(!trylock);
1095
1096	WARN_ON(__i915_vma_unbind(vma));
1097	if (trylock)
1098	i915_gem_object_unlock(obj);
1099	}
1100
1101	if (drm_mm_node_allocated(node: &ggtt->error_capture))
1102	drm_mm_remove_node(node: &ggtt->error_capture);
1103	mutex_destroy(lock: &ggtt->error_mutex);
1104
1105	ggtt_release_guc_top(ggtt);
1106	intel_vgt_deballoon(ggtt);
1107
1108	ggtt->vm.cleanup(&ggtt->vm);
1109
1110	mutex_unlock(lock: &ggtt->vm.mutex);
1111	i915_address_space_fini(vm: &ggtt->vm);
1112
1113	arch_phys_wc_del(handle: ggtt->mtrr);
1114
1115	if (ggtt->iomap.size)
1116	io_mapping_fini(mapping: &ggtt->iomap);
1117	}
1118
1119	/**
1120	* i915_ggtt_driver_release - Clean up GGTT hardware initialization
1121	* @i915: i915 device
1122	*/
1123	void i915_ggtt_driver_release(struct drm_i915_private *i915)
1124	{
1125	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
1126
1127	fini_aliasing_ppgtt(ggtt);
1128
1129	intel_ggtt_fini_fences(ggtt);
1130	ggtt_cleanup_hw(ggtt);
1131	}
1132
1133	/**
1134	* i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after
1135	* all free objects have been drained.
1136	* @i915: i915 device
1137	*/
1138	void i915_ggtt_driver_late_release(struct drm_i915_private *i915)
1139	{
1140	struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
1141
1142	GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != `1`);
1143	dma_resv_fini(obj: &ggtt->vm._resv);
1144	}
1145
1146	static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
1147	{
1148	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
1149	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
1150	return snb_gmch_ctl << `20`;
1151	}
1152
1153	static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
1154	{
1155	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
1156	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
1157	if (bdw_gmch_ctl)
1158	bdw_gmch_ctl = `1` << bdw_gmch_ctl;
1159
1160	#ifdef CONFIG_X86_32
1161	/ Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE /
1162	if (bdw_gmch_ctl > `4`)
1163	bdw_gmch_ctl = `4`;
1164	#endif
1165
1166	return bdw_gmch_ctl << `20`;
1167	}
1168
1169	static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
1170	{
1171	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
1172	gmch_ctrl &= SNB_GMCH_GGMS_MASK;
1173
1174	if (gmch_ctrl)
1175	return `1` << (`20` + gmch_ctrl);
1176
1177	return `0`;
1178	}
1179
1180	static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915)
1181	{
1182	/*
1183	* GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset
1184	* GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset
1185	*/
1186	GEM_BUG_ON(GRAPHICS_VER(i915) < `6`);
1187	return (GRAPHICS_VER(i915) < `8`) ? SZ_4M : SZ_16M;
1188	}
1189
1190	static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915)
1191	{
1192	return gen6_gttmmadr_size(i915) / `2`;
1193	}
1194
1195	static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
1196	{
1197	struct drm_i915_private *i915 = ggtt->vm.i915;
1198	struct intel_uncore *uncore = ggtt->vm.gt->uncore;
1199	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1200	phys_addr_t phys_addr;
1201	u32 pte_flags;
1202	int ret;
1203
1204	GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915));
1205
1206	if (i915_direct_stolen_access(i915)) {
1207	drm_dbg(&i915->drm, "Using direct GSM access\n");
1208	phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK;
1209	} else {
1210	phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915);
1211	}
1212
1213	if (needs_wc_ggtt_mapping(i915))
1214	ggtt->gsm = ioremap_wc(offset: phys_addr, size);
1215	else
1216	ggtt->gsm = ioremap(offset: phys_addr, size);
1217
1218	if (!ggtt->gsm) {
1219	drm_err(&i915->drm, "Failed to map the ggtt page table\n");
1220	return -ENOMEM;
1221	}
1222
1223	kref_init(kref: &ggtt->vm.resv_ref);
1224	ret = setup_scratch_page(&ggtt->vm);
1225	if (ret) {
1226	drm_err(&i915->drm, "Scratch setup failed\n");
1227	/ iounmap will also get called at remove, but meh /
1228	iounmap(addr: ggtt->gsm);
1229	return ret;
1230	}
1231
1232	pte_flags = `0`;
1233	if (i915_gem_object_is_lmem(obj: ggtt->vm.scratch[`0`]))
1234	pte_flags \|= PTE_LM;
1235
1236	ggtt->vm.scratch[`0`]->encode =
1237	ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[`0`]),
1238	i915_gem_get_pat_index(i915,
1239	level: I915_CACHE_NONE),
1240	pte_flags);
1241
1242	return `0`;
1243	}
1244
1245	static void gen6_gmch_remove(struct i915_address_space *vm)
1246	{
1247	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
1248
1249	iounmap(addr: ggtt->gsm);
1250	free_scratch(vm);
1251	}
1252
1253	static struct resource pci_resource(struct pci_dev pdev, int* bar)
1254	{
1255	return DEFINE_RES_MEM(pci_resource_start(pdev, bar),
1256	pci_resource_len(pdev, bar));
1257	}
1258
1259	static int gen8_gmch_probe(struct i915_ggtt *ggtt)
1260	{
1261	struct drm_i915_private *i915 = ggtt->vm.i915;
1262	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1263	unsigned int size;
1264	u16 snb_gmch_ctl;
1265
1266	if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) {
1267	if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
1268	return -ENXIO;
1269
1270	ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
1271	ggtt->mappable_end = resource_size(res: &ggtt->gmadr);
1272	}
1273
1274	pci_read_config_word(dev: pdev, SNB_GMCH_CTRL, val: &snb_gmch_ctl);
1275	if (IS_CHERRYVIEW(i915))
1276	size = chv_get_total_gtt_size(gmch_ctrl: snb_gmch_ctl);
1277	else
1278	size = gen8_get_total_gtt_size(bdw_gmch_ctl: snb_gmch_ctl);
1279
1280	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
1281	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
1282	ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
1283
1284	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
1285	ggtt->vm.cleanup = gen6_gmch_remove;
1286	ggtt->vm.insert_page = gen8_ggtt_insert_page;
1287	ggtt->vm.clear_range = nop_clear_range;
1288	ggtt->vm.scratch_range = gen8_ggtt_clear_range;
1289
1290	ggtt->vm.insert_entries = gen8_ggtt_insert_entries;
1291	ggtt->vm.read_entry = gen8_ggtt_read_entry;
1292
1293	/*
1294	* Serialize GTT updates with aperture access on BXT if VT-d is on,
1295	* and always on CHV.
1296	*/
1297	if (intel_vm_no_concurrent_access_wa(i915)) {
1298	ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL;
1299	ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL;
1300
1301	/*
1302	* Calling stop_machine() version of GGTT update function
1303	* at error capture/reset path will raise lockdep warning.
1304	* Allow calling gen8_ggtt_insert_* directly at reset path
1305	* which is safe from parallel GGTT updates.
1306	*/
1307	ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
1308	ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries;
1309
1310	ggtt->vm.bind_async_flags =
1311	I915_VMA_GLOBAL_BIND \| I915_VMA_LOCAL_BIND;
1312	}
1313
1314	if (i915_ggtt_require_binder(i915)) {
1315	ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
1316	ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
1317	ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
1318	/*
1319	* On GPU is hung, we might bind VMAs for error capture.
1320	* Fallback to CPU GGTT updates in that case.
1321	*/
1322	ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
1323	}
1324
1325	if (intel_uc_wants_guc_submission(uc: &ggtt->vm.gt->uc))
1326	ggtt->invalidate = guc_ggtt_invalidate;
1327	else
1328	ggtt->invalidate = gen8_ggtt_invalidate;
1329
1330	ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
1331	ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
1332
1333	if (GRAPHICS_VER_FULL(i915) >= IP_VER(`12`, `70`))
1334	ggtt->vm.pte_encode = mtl_ggtt_pte_encode;
1335	else
1336	ggtt->vm.pte_encode = gen8_ggtt_pte_encode;
1337
1338	ggtt->vm.pte_decode = gen8_ggtt_pte_decode;
1339
1340	return ggtt_probe_common(ggtt, size);
1341	}
1342
1343	/*
1344	* For pre-gen8 platforms pat_index is the same as enum i915_cache_level,
1345	* so the switch-case statements in these PTE encode functions are still valid.
1346	* See translation table LEGACY_CACHELEVEL.
1347	*/
1348	static u64 snb_pte_encode(dma_addr_t addr,
1349	unsigned int pat_index,
1350	u32 flags)
1351	{
1352	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) \| GEN6_PTE_VALID;
1353
1354	switch (pat_index) {
1355	case I915_CACHE_L3_LLC:
1356	case I915_CACHE_LLC:
1357	pte \|= GEN6_PTE_CACHE_LLC;
1358	break;
1359	case I915_CACHE_NONE:
1360	pte \|= GEN6_PTE_UNCACHED;
1361	break;
1362	default:
1363	MISSING_CASE(pat_index);
1364	}
1365
1366	return pte;
1367	}
1368
1369	static u64 ivb_pte_encode(dma_addr_t addr,
1370	unsigned int pat_index,
1371	u32 flags)
1372	{
1373	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) \| GEN6_PTE_VALID;
1374
1375	switch (pat_index) {
1376	case I915_CACHE_L3_LLC:
1377	pte \|= GEN7_PTE_CACHE_L3_LLC;
1378	break;
1379	case I915_CACHE_LLC:
1380	pte \|= GEN6_PTE_CACHE_LLC;
1381	break;
1382	case I915_CACHE_NONE:
1383	pte \|= GEN6_PTE_UNCACHED;
1384	break;
1385	default:
1386	MISSING_CASE(pat_index);
1387	}
1388
1389	return pte;
1390	}
1391
1392	static u64 byt_pte_encode(dma_addr_t addr,
1393	unsigned int pat_index,
1394	u32 flags)
1395	{
1396	gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) \| GEN6_PTE_VALID;
1397
1398	if (!(flags & PTE_READ_ONLY))
1399	pte \|= BYT_PTE_WRITEABLE;
1400
1401	if (pat_index != I915_CACHE_NONE)
1402	pte \|= BYT_PTE_SNOOPED_BY_CPU_CACHES;
1403
1404	return pte;
1405	}
1406
1407	static u64 hsw_pte_encode(dma_addr_t addr,
1408	unsigned int pat_index,
1409	u32 flags)
1410	{
1411	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) \| GEN6_PTE_VALID;
1412
1413	if (pat_index != I915_CACHE_NONE)
1414	pte \|= HSW_WB_LLC_AGE3;
1415
1416	return pte;
1417	}
1418
1419	static u64 iris_pte_encode(dma_addr_t addr,
1420	unsigned int pat_index,
1421	u32 flags)
1422	{
1423	gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) \| GEN6_PTE_VALID;
1424
1425	switch (pat_index) {
1426	case I915_CACHE_NONE:
1427	break;
1428	case I915_CACHE_WT:
1429	pte \|= HSW_WT_ELLC_LLC_AGE3;
1430	break;
1431	default:
1432	pte \|= HSW_WB_ELLC_LLC_AGE3;
1433	break;
1434	}
1435
1436	return pte;
1437	}
1438
1439	static dma_addr_t gen6_pte_decode(u64 pte, bool is_present, bool is_local)
1440	{
1441	*is_present = pte & GEN6_PTE_VALID;
1442	*is_local = false;
1443
1444	return ((pte & `0xff0`) << `28`) \| (pte & ~`0xfff`);
1445	}
1446
1447	static int gen6_gmch_probe(struct i915_ggtt *ggtt)
1448	{
1449	struct drm_i915_private *i915 = ggtt->vm.i915;
1450	struct pci_dev *pdev = to_pci_dev(i915->drm.dev);
1451	unsigned int size;
1452	u16 snb_gmch_ctl;
1453
1454	if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR))
1455	return -ENXIO;
1456
1457	ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR);
1458	ggtt->mappable_end = resource_size(res: &ggtt->gmadr);
1459
1460	/*
1461	* 64/512MB is the current min/max we actually know of, but this is
1462	* just a coarse sanity check.
1463	*/
1464	if (ggtt->mappable_end < (`64` << `20`) \|\|
1465	ggtt->mappable_end > (`512` << `20`)) {
1466	drm_err(&i915->drm, "Unknown GMADR size (%pa)\n",
1467	&ggtt->mappable_end);
1468	return -ENXIO;
1469	}
1470
1471	pci_read_config_word(dev: pdev, SNB_GMCH_CTRL, val: &snb_gmch_ctl);
1472
1473	size = gen6_get_total_gtt_size(snb_gmch_ctl);
1474	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
1475
1476	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
1477	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
1478
1479	ggtt->vm.clear_range = nop_clear_range;
1480	if (!HAS_FULL_PPGTT(i915))
1481	ggtt->vm.clear_range = gen6_ggtt_clear_range;
1482	ggtt->vm.scratch_range = gen6_ggtt_clear_range;
1483	ggtt->vm.insert_page = gen6_ggtt_insert_page;
1484	ggtt->vm.insert_entries = gen6_ggtt_insert_entries;
1485	ggtt->vm.read_entry = gen6_ggtt_read_entry;
1486	ggtt->vm.cleanup = gen6_gmch_remove;
1487
1488	ggtt->invalidate = gen6_ggtt_invalidate;
1489
1490	if (HAS_EDRAM(i915))
1491	ggtt->vm.pte_encode = iris_pte_encode;
1492	else if (IS_HASWELL(i915))
1493	ggtt->vm.pte_encode = hsw_pte_encode;
1494	else if (IS_VALLEYVIEW(i915))
1495	ggtt->vm.pte_encode = byt_pte_encode;
1496	else if (GRAPHICS_VER(i915) >= `7`)
1497	ggtt->vm.pte_encode = ivb_pte_encode;
1498	else
1499	ggtt->vm.pte_encode = snb_pte_encode;
1500
1501	ggtt->vm.pte_decode = gen6_pte_decode;
1502
1503	ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma;
1504	ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma;
1505
1506	return ggtt_probe_common(ggtt, size);
1507	}
1508
1509	static int ggtt_probe_hw(struct i915_ggtt ggtt, struct* intel_gt *gt)
1510	{
1511	struct drm_i915_private *i915 = gt->i915;
1512	int ret;
1513
1514	ggtt->vm.gt = gt;
1515	ggtt->vm.i915 = i915;
1516	ggtt->vm.dma = i915->drm.dev;
1517	dma_resv_init(obj: &ggtt->vm._resv);
1518
1519	if (GRAPHICS_VER(i915) >= `8`)
1520	ret = gen8_gmch_probe(ggtt);
1521	else if (GRAPHICS_VER(i915) >= `6`)
1522	ret = gen6_gmch_probe(ggtt);
1523	else
1524	ret = intel_ggtt_gmch_probe(ggtt);
1525
1526	if (ret) {
1527	dma_resv_fini(obj: &ggtt->vm._resv);
1528	return ret;
1529	}
1530
1531	if ((ggtt->vm.total - `1`) >> `32`) {
1532	drm_err(&i915->drm,
1533	"We never expected a Global GTT with more than 32bits"
1534	" of address space! Found %lldM!\n",
1535	ggtt->vm.total >> `20`);
1536	ggtt->vm.total = `1ULL` << `32`;
1537	ggtt->mappable_end =
1538	min_t(u64, ggtt->mappable_end, ggtt->vm.total);
1539	}
1540
1541	if (ggtt->mappable_end > ggtt->vm.total) {
1542	drm_err(&i915->drm,
1543	"mappable aperture extends past end of GGTT,"
1544	" aperture=%pa, total=%llx\n",
1545	&ggtt->mappable_end, ggtt->vm.total);
1546	ggtt->mappable_end = ggtt->vm.total;
1547	}
1548
1549	/ GMADR is the PCI mmio aperture into the global GTT. /
1550	drm_dbg(&i915->drm, "GGTT size = %lluM\n", ggtt->vm.total >> `20`);
1551	drm_dbg(&i915->drm, "GMADR size = %lluM\n",
1552	(u64)ggtt->mappable_end >> `20`);
1553	drm_dbg(&i915->drm, "DSM size = %lluM\n",
1554	(u64)resource_size(&intel_graphics_stolen_res) >> `20`);
1555
1556	return `0`;
1557	}
1558
1559	/**
1560	* i915_ggtt_probe_hw - Probe GGTT hardware location
1561	* @i915: i915 device
1562	*/
1563	int i915_ggtt_probe_hw(struct drm_i915_private *i915)
1564	{
1565	struct intel_gt *gt;
1566	int ret, i;
1567
1568	for_each_gt(gt, i915, i) {
1569	ret = intel_gt_assign_ggtt(gt);
1570	if (ret)
1571	return ret;
1572	}
1573
1574	ret = ggtt_probe_hw(ggtt: to_gt(i915)->ggtt, gt: to_gt(i915));
1575	if (ret)
1576	return ret;
1577
1578	if (i915_vtd_active(i915))
1579	drm_info(&i915->drm, "VT-d active for gfx access\n");
1580
1581	return `0`;
1582	}
1583
1584	struct i915_ggtt i915_ggtt_create(struct* drm_i915_private *i915)
1585	{
1586	struct i915_ggtt *ggtt;
1587
1588	ggtt = drmm_kzalloc(dev: &i915->drm, size: sizeof(*ggtt), GFP_KERNEL);
1589	if (!ggtt)
1590	return ERR_PTR(error: -ENOMEM);
1591
1592	INIT_LIST_HEAD(list: &ggtt->gt_list);
1593
1594	return ggtt;
1595	}
1596
1597	int i915_ggtt_enable_hw(struct drm_i915_private *i915)
1598	{
1599	if (GRAPHICS_VER(i915) < `6`)
1600	return intel_ggtt_gmch_enable_hw(i915);
1601
1602	return `0`;
1603	}
1604
1605	/**
1606	* i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM
1607	* @vm: The VM to restore the mappings for
1608	* @all_evicted: Were all VMAs expected to be evicted on suspend?
1609	*
1610	* Restore the memory mappings for all objects mapped to HW via the GGTT or a
1611	* DPT page table.
1612	*
1613	* Returns %true if restoring the mapping for any object that was in a write
1614	* domain before suspend.
1615	*/
1616	bool i915_ggtt_resume_vm(struct i915_address_space *vm, bool all_evicted)
1617	{
1618	struct i915_vma *vma;
1619	bool write_domain_objs = false;
1620
1621	drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt);
1622
1623	if (all_evicted) {
1624	drm_WARN_ON(&vm->i915->drm, !list_empty(&vm->bound_list));
1625	return false;
1626	}
1627
1628	/ First fill our portion of the GTT with scratch pages /
1629	vm->clear_range(vm, `0`, vm->total);
1630
1631	/ clflush objects bound into the GGTT and rebind them. /
1632	list_for_each_entry(vma, &vm->bound_list, vm_link) {
1633	struct drm_i915_gem_object *obj = vma->obj;
1634	unsigned int was_bound =
1635	atomic_read(v: &vma->flags) & I915_VMA_BIND_MASK;
1636
1637	GEM_BUG_ON(!was_bound);
1638
1639	/*
1640	* Clear the bound flags of the vma resource to allow
1641	* ptes to be repopulated.
1642	*/
1643	vma->resource->bound_flags = `0`;
1644	vma->ops->bind_vma(vm, NULL, vma->resource,
1645	obj ? obj->pat_index :
1646	i915_gem_get_pat_index(i915: vm->i915,
1647	level: I915_CACHE_NONE),
1648	was_bound);
1649
1650	if (obj) { / only used during resume => exclusive access /
1651	write_domain_objs \|= fetch_and_zero(&obj->write_domain);
1652	obj->read_domains \|= I915_GEM_DOMAIN_GTT;
1653	}
1654	}
1655
1656	return write_domain_objs;
1657	}
1658
1659	void i915_ggtt_resume(struct i915_ggtt *ggtt)
1660	{
1661	struct intel_gt *gt;
1662	bool flush;
1663
1664	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
1665	intel_gt_check_and_clear_faults(gt);
1666
1667	flush = i915_ggtt_resume_vm(vm: &ggtt->vm, all_evicted: false);
1668
1669	if (drm_mm_node_allocated(node: &ggtt->error_capture))
1670	ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start,
1671	ggtt->error_capture.size);
1672
1673	list_for_each_entry(gt, &ggtt->gt_list, ggtt_link)
1674	intel_uc_resume_mappings(uc: &gt->uc);
1675
1676	ggtt->invalidate(ggtt);
1677
1678	if (flush)
1679	wbinvd_on_all_cpus();
1680
1681	intel_ggtt_restore_fences(ggtt);
1682	}
1683

source code of linux/drivers/gpu/drm/i915/gt/intel_ggtt.c