panthor_heap.c source code [linux/drivers/gpu/drm/panthor/panthor_heap.c]

1	// SPDX-License-Identifier: GPL-2.0 or MIT
2	/ Copyright 2023 Collabora ltd. /
3
4	#include <linux/iosys-map.h>
5	#include <linux/rwsem.h>
6
7	#include <drm/drm_print.h>
8	#include <drm/panthor_drm.h>
9
10	#include "panthor_device.h"
11	#include "panthor_gem.h"
12	#include "panthor_heap.h"
13	#include "panthor_mmu.h"
14	#include "panthor_regs.h"
15
16	/*
17	* The GPU heap context is an opaque structure used by the GPU to track the
18	* heap allocations. The driver should only touch it to initialize it (zero all
19	* fields). Because the CPU and GPU can both access this structure it is
20	* required to be GPU cache line aligned.
21	*/
22	#define HEAP_CONTEXT_SIZE 32
23
24	/**
25	* struct panthor_heap_chunk_header - Heap chunk header
26	*/
27	struct panthor_heap_chunk_header {
28	/**
29	* @next: Next heap chunk in the list.
30	*
31	* This is a GPU VA.
32	*/
33	u64 next;
34
35	/* @unknown: MBZ. /
36	u32 unknown[`14`];
37	};
38
39	/**
40	* struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
41	*/
42	struct panthor_heap_chunk {
43	/* @node: Used to insert the heap chunk in panthor_heap::chunks. /
44	struct list_head node;
45
46	/* @bo: Buffer object backing the heap chunk. /
47	struct panthor_kernel_bo *bo;
48	};
49
50	/**
51	* struct panthor_heap - Structure used to manage tiler heap contexts.
52	*/
53	struct panthor_heap {
54	/* @chunks: List containing all heap chunks allocated so far. /
55	struct list_head chunks;
56
57	/* @lock: Lock protecting insertion in the chunks list. /
58	struct mutex lock;
59
60	/* @chunk_size: Size of each chunk. /
61	u32 chunk_size;
62
63	/* @max_chunks: Maximum number of chunks. /
64	u32 max_chunks;
65
66	/**
67	* @target_in_flight: Number of in-flight render passes after which
68	* we'd let the FW wait for fragment job to finish instead of allocating new chunks.
69	*/
70	u32 target_in_flight;
71
72	/* @chunk_count: Number of heap chunks currently allocated. /
73	u32 chunk_count;
74	};
75
76	#define MAX_HEAPS_PER_POOL 128
77
78	/**
79	* struct panthor_heap_pool - Pool of heap contexts
80	*
81	* The pool is attached to a panthor_file and can't be shared across processes.
82	*/
83	struct panthor_heap_pool {
84	/* @refcount: Reference count. /
85	struct kref refcount;
86
87	/* @ptdev: Device. /
88	struct panthor_device *ptdev;
89
90	/* @vm: VM this pool is bound to. /
91	struct panthor_vm *vm;
92
93	/* @lock: Lock protecting access to @xa. /
94	struct rw_semaphore lock;
95
96	/* @xa: Array storing panthor_heap objects. /
97	struct xarray xa;
98
99	/* @gpu_contexts: Buffer object containing the GPU heap contexts. /
100	struct panthor_kernel_bo *gpu_contexts;
101
102	/* @size: Size of all chunks across all heaps in the pool. /
103	atomic_t size;
104	};
105
106	static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
107	{
108	u32 l2_features = ptdev->gpu_info.l2_features;
109	u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
110
111	return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
112	}
113
114	static int panthor_get_heap_ctx_offset(struct panthor_heap_pool pool, int* id)
115	{
116	return panthor_heap_ctx_stride(ptdev: pool->ptdev) * id;
117	}
118
119	static void panthor_get_heap_ctx(struct* panthor_heap_pool pool, int* id)
120	{
121	return pool->gpu_contexts->kmap +
122	panthor_get_heap_ctx_offset(pool, id);
123	}
124
125	static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
126	struct panthor_heap *heap,
127	struct panthor_heap_chunk *chunk)
128	{
129	mutex_lock(&heap->lock);
130	list_del(entry: &chunk->node);
131	heap->chunk_count--;
132	mutex_unlock(lock: &heap->lock);
133
134	atomic_sub(i: heap->chunk_size, v: &pool->size);
135
136	panthor_kernel_bo_destroy(bo: chunk->bo);
137	kfree(objp: chunk);
138	}
139
140	static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
141	struct panthor_heap *heap,
142	bool initial_chunk)
143	{
144	struct panthor_heap_chunk *chunk;
145	struct panthor_heap_chunk_header *hdr;
146	int ret;
147
148	chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
149	if (!chunk)
150	return -ENOMEM;
151
152	chunk->bo = panthor_kernel_bo_create(ptdev: pool->ptdev, vm: pool->vm, size: heap->chunk_size,
153	bo_flags: DRM_PANTHOR_BO_NO_MMAP,
154	vm_map_flags: DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
155	PANTHOR_VM_KERNEL_AUTO_VA,
156	name: "Tiler heap chunk");
157	if (IS_ERR(ptr: chunk->bo)) {
158	ret = PTR_ERR(ptr: chunk->bo);
159	goto err_free_chunk;
160	}
161
162	ret = panthor_kernel_bo_vmap(bo: chunk->bo);
163	if (ret)
164	goto err_destroy_bo;
165
166	hdr = chunk->bo->kmap;
167	memset(hdr, `0`, sizeof(*hdr));
168
169	if (initial_chunk && !list_empty(head: &heap->chunks)) {
170	struct panthor_heap_chunk *prev_chunk;
171	u64 prev_gpuva;
172
173	prev_chunk = list_first_entry(&heap->chunks,
174	struct panthor_heap_chunk,
175	node);
176
177	prev_gpuva = panthor_kernel_bo_gpuva(bo: prev_chunk->bo);
178	hdr->next = (prev_gpuva & GENMASK_ULL(`63`, `12`)) \|
179	(heap->chunk_size >> `12`);
180	}
181
182	panthor_kernel_bo_vunmap(bo: chunk->bo);
183
184	mutex_lock(&heap->lock);
185	list_add(new: &chunk->node, head: &heap->chunks);
186	heap->chunk_count++;
187	mutex_unlock(lock: &heap->lock);
188
189	atomic_add(i: heap->chunk_size, v: &pool->size);
190
191	return `0`;
192
193	err_destroy_bo:
194	panthor_kernel_bo_destroy(bo: chunk->bo);
195
196	err_free_chunk:
197	kfree(objp: chunk);
198
199	return ret;
200	}
201
202	static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
203	struct panthor_heap *heap)
204	{
205	struct panthor_heap_chunk chunk, tmp;
206
207	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
208	panthor_free_heap_chunk(pool, heap, chunk);
209	}
210
211	static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
212	struct panthor_heap *heap,
213	u32 chunk_count)
214	{
215	int ret;
216	u32 i;
217
218	for (i = `0`; i < chunk_count; i++) {
219	ret = panthor_alloc_heap_chunk(pool, heap, initial_chunk: true);
220	if (ret)
221	return ret;
222	}
223
224	return `0`;
225	}
226
227	static int
228	panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
229	{
230	struct panthor_heap *heap;
231
232	heap = xa_erase(&pool->xa, index: handle);
233	if (!heap)
234	return -EINVAL;
235
236	panthor_free_heap_chunks(pool, heap);
237	mutex_destroy(lock: &heap->lock);
238	kfree(objp: heap);
239	return `0`;
240	}
241
242	/**
243	* panthor_heap_destroy() - Destroy a heap context
244	* @pool: Pool this context belongs to.
245	* @handle: Handle returned by panthor_heap_create().
246	*/
247	int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
248	{
249	int ret;
250
251	down_write(sem: &pool->lock);
252	ret = panthor_heap_destroy_locked(pool, handle);
253	up_write(sem: &pool->lock);
254
255	return ret;
256	}
257
258	/**
259	* panthor_heap_create() - Create a heap context
260	* @pool: Pool to instantiate the heap context from.
261	* @initial_chunk_count: Number of chunk allocated at initialization time.
262	* Must be at least 1.
263	* @chunk_size: The size of each chunk. Must be page-aligned and lie in the
264	* [128k:8M] range.
265	* @max_chunks: Maximum number of chunks that can be allocated.
266	* @target_in_flight: Maximum number of in-flight render passes.
267	* @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
268	* context.
269	* @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
270	* assigned to the heap context.
271	*
272	* Return: a positive handle on success, a negative error otherwise.
273	*/
274	int panthor_heap_create(struct panthor_heap_pool *pool,
275	u32 initial_chunk_count,
276	u32 chunk_size,
277	u32 max_chunks,
278	u32 target_in_flight,
279	u64 *heap_ctx_gpu_va,
280	u64 *first_chunk_gpu_va)
281	{
282	struct panthor_heap *heap;
283	struct panthor_heap_chunk *first_chunk;
284	struct panthor_vm *vm;
285	int ret = `0`;
286	u32 id;
287
288	if (initial_chunk_count == `0`)
289	return -EINVAL;
290
291	if (initial_chunk_count > max_chunks)
292	return -EINVAL;
293
294	if (!IS_ALIGNED(chunk_size, PAGE_SIZE) \|\|
295	chunk_size < SZ_128K \|\| chunk_size > SZ_8M)
296	return -EINVAL;
297
298	down_read(sem: &pool->lock);
299	vm = panthor_vm_get(vm: pool->vm);
300	up_read(sem: &pool->lock);
301
302	/ The pool has been destroyed, we can't create a new heap. /
303	if (!vm)
304	return -EINVAL;
305
306	heap = kzalloc(sizeof(*heap), GFP_KERNEL);
307	if (!heap) {
308	ret = -ENOMEM;
309	goto err_put_vm;
310	}
311
312	mutex_init(&heap->lock);
313	INIT_LIST_HEAD(list: &heap->chunks);
314	heap->chunk_size = chunk_size;
315	heap->max_chunks = max_chunks;
316	heap->target_in_flight = target_in_flight;
317
318	ret = panthor_alloc_heap_chunks(pool, heap, chunk_count: initial_chunk_count);
319	if (ret)
320	goto err_free_heap;
321
322	first_chunk = list_first_entry(&heap->chunks,
323	struct panthor_heap_chunk,
324	node);
325	*first_chunk_gpu_va = panthor_kernel_bo_gpuva(bo: first_chunk->bo);
326
327	down_write(sem: &pool->lock);
328	/ The pool has been destroyed, we can't create a new heap. /
329	if (!pool->vm) {
330	ret = -EINVAL;
331	} else {
332	ret = xa_alloc(xa: &pool->xa, id: &id, entry: heap,
333	XA_LIMIT(`0`, MAX_HEAPS_PER_POOL - `1`), GFP_KERNEL);
334	if (!ret) {
335	void *gpu_ctx = panthor_get_heap_ctx(pool, id);
336
337	memset(gpu_ctx, `0`, panthor_heap_ctx_stride(pool->ptdev));
338	*heap_ctx_gpu_va = panthor_kernel_bo_gpuva(bo: pool->gpu_contexts) +
339	panthor_get_heap_ctx_offset(pool, id);
340	}
341	}
342	up_write(sem: &pool->lock);
343
344	if (ret)
345	goto err_free_heap;
346
347	panthor_vm_put(vm);
348	return id;
349
350	err_free_heap:
351	panthor_free_heap_chunks(pool, heap);
352	mutex_destroy(lock: &heap->lock);
353	kfree(objp: heap);
354
355	err_put_vm:
356	panthor_vm_put(vm);
357	return ret;
358	}
359
360	/**
361	* panthor_heap_return_chunk() - Return an unused heap chunk
362	* @pool: The pool this heap belongs to.
363	* @heap_gpu_va: The GPU address of the heap context.
364	* @chunk_gpu_va: The chunk VA to return.
365	*
366	* This function is used when a chunk allocated with panthor_heap_grow()
367	* couldn't be linked to the heap context through the FW interface because
368	* the group requesting the allocation was scheduled out in the meantime.
369	*/
370	int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
371	u64 heap_gpu_va,
372	u64 chunk_gpu_va)
373	{
374	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(bo: pool->gpu_contexts);
375	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(ptdev: pool->ptdev);
376	struct panthor_heap_chunk chunk, tmp, *removed = NULL;
377	struct panthor_heap *heap;
378	int ret;
379
380	if (offset > U32_MAX \|\| heap_id >= MAX_HEAPS_PER_POOL)
381	return -EINVAL;
382
383	down_read(sem: &pool->lock);
384	heap = xa_load(&pool->xa, index: heap_id);
385	if (!heap) {
386	ret = -EINVAL;
387	goto out_unlock;
388	}
389
390	chunk_gpu_va &= GENMASK_ULL(`63`, `12`);
391
392	mutex_lock(&heap->lock);
393	list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
394	if (panthor_kernel_bo_gpuva(bo: chunk->bo) == chunk_gpu_va) {
395	removed = chunk;
396	list_del(entry: &chunk->node);
397	heap->chunk_count--;
398	atomic_sub(i: heap->chunk_size, v: &pool->size);
399	break;
400	}
401	}
402	mutex_unlock(lock: &heap->lock);
403
404	if (removed) {
405	panthor_kernel_bo_destroy(bo: chunk->bo);
406	kfree(objp: chunk);
407	ret = `0`;
408	} else {
409	ret = -EINVAL;
410	}
411
412	out_unlock:
413	up_read(sem: &pool->lock);
414	return ret;
415	}
416
417	/**
418	* panthor_heap_grow() - Make a heap context grow.
419	* @pool: The pool this heap belongs to.
420	* @heap_gpu_va: The GPU address of the heap context.
421	* @renderpasses_in_flight: Number of render passes currently in-flight.
422	* @pending_frag_count: Number of fragment jobs waiting for execution/completion.
423	* @new_chunk_gpu_va: Pointer used to return the chunk VA.
424	*
425	* Return:
426	* - 0 if a new heap was allocated
427	* - -ENOMEM if the tiler context reached the maximum number of chunks
428	* or if too many render passes are in-flight
429	* or if the allocation failed
430	* - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
431	*/
432	int panthor_heap_grow(struct panthor_heap_pool *pool,
433	u64 heap_gpu_va,
434	u32 renderpasses_in_flight,
435	u32 pending_frag_count,
436	u64 *new_chunk_gpu_va)
437	{
438	u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(bo: pool->gpu_contexts);
439	u32 heap_id = (u32)offset / panthor_heap_ctx_stride(ptdev: pool->ptdev);
440	struct panthor_heap_chunk *chunk;
441	struct panthor_heap *heap;
442	int ret;
443
444	if (offset > U32_MAX \|\| heap_id >= MAX_HEAPS_PER_POOL)
445	return -EINVAL;
446
447	down_read(sem: &pool->lock);
448	heap = xa_load(&pool->xa, index: heap_id);
449	if (!heap) {
450	ret = -EINVAL;
451	goto out_unlock;
452	}
453
454	/ If we reached the target in-flight render passes, or if we*
455	* reached the maximum number of chunks, let the FW figure another way to
456	* find some memory (wait for render passes to finish, or call the exception
457	* handler provided by the userspace driver, if any).
458	*/
459	if (renderpasses_in_flight > heap->target_in_flight \|\|
460	heap->chunk_count >= heap->max_chunks) {
461	ret = -ENOMEM;
462	goto out_unlock;
463	}
464
465	/ FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,*
466	* which goes through the blocking allocation path. Ultimately, we
467	* want a non-blocking allocation, so we can immediately report to the
468	* FW when the system is running out of memory. In that case, the FW
469	* can call a user-provided exception handler, which might try to free
470	* some tiler memory by issuing an intermediate fragment job. If the
471	* exception handler can't do anything, it will flag the queue as
472	* faulty so the job that triggered this tiler chunk allocation and all
473	* further jobs in this queue fail immediately instead of having to
474	* wait for the job timeout.
475	*/
476	ret = panthor_alloc_heap_chunk(pool, heap, initial_chunk: false);
477	if (ret)
478	goto out_unlock;
479
480	chunk = list_first_entry(&heap->chunks,
481	struct panthor_heap_chunk,
482	node);
483	*new_chunk_gpu_va = (panthor_kernel_bo_gpuva(bo: chunk->bo) & GENMASK_ULL(`63`, `12`)) \|
484	(heap->chunk_size >> `12`);
485	ret = `0`;
486
487	out_unlock:
488	up_read(sem: &pool->lock);
489	return ret;
490	}
491
492	static void panthor_heap_pool_release(struct kref *refcount)
493	{
494	struct panthor_heap_pool *pool =
495	container_of(refcount, struct panthor_heap_pool, refcount);
496
497	xa_destroy(&pool->xa);
498	kfree(objp: pool);
499	}
500
501	/**
502	* panthor_heap_pool_put() - Release a heap pool reference
503	* @pool: Pool to release the reference on. Can be NULL.
504	*/
505	void panthor_heap_pool_put(struct panthor_heap_pool *pool)
506	{
507	if (pool)
508	kref_put(kref: &pool->refcount, release: panthor_heap_pool_release);
509	}
510
511	/**
512	* panthor_heap_pool_get() - Get a heap pool reference
513	* @pool: Pool to get the reference on. Can be NULL.
514	*
515	* Return: @pool.
516	*/
517	struct panthor_heap_pool *
518	panthor_heap_pool_get(struct panthor_heap_pool *pool)
519	{
520	if (pool)
521	kref_get(kref: &pool->refcount);
522
523	return pool;
524	}
525
526	/**
527	* panthor_heap_pool_create() - Create a heap pool
528	* @ptdev: Device.
529	* @vm: The VM this heap pool will be attached to.
530	*
531	* Heap pools might contain up to 128 heap contexts, and are per-VM.
532	*
533	* Return: A valid pointer on success, a negative error code otherwise.
534	*/
535	struct panthor_heap_pool *
536	panthor_heap_pool_create(struct panthor_device ptdev, struct* panthor_vm *vm)
537	{
538	size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
539	panthor_heap_ctx_stride(ptdev),
540	`4096`);
541	struct panthor_heap_pool *pool;
542	int ret = `0`;
543
544	pool = kzalloc(sizeof(*pool), GFP_KERNEL);
545	if (!pool)
546	return ERR_PTR(error: -ENOMEM);
547
548	/ We want a weak ref here: the heap pool belongs to the VM, so we're*
549	* sure that, as long as the heap pool exists, the VM exists too.
550	*/
551	pool->vm = vm;
552	pool->ptdev = ptdev;
553	init_rwsem(&pool->lock);
554	xa_init_flags(xa: &pool->xa, XA_FLAGS_ALLOC);
555	kref_init(kref: &pool->refcount);
556
557	pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, size: bosize,
558	bo_flags: DRM_PANTHOR_BO_NO_MMAP,
559	vm_map_flags: DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
560	PANTHOR_VM_KERNEL_AUTO_VA,
561	name: "Heap pool");
562	if (IS_ERR(ptr: pool->gpu_contexts)) {
563	ret = PTR_ERR(ptr: pool->gpu_contexts);
564	goto err_destroy_pool;
565	}
566
567	ret = panthor_kernel_bo_vmap(bo: pool->gpu_contexts);
568	if (ret)
569	goto err_destroy_pool;
570
571	atomic_add(i: pool->gpu_contexts->obj->size, v: &pool->size);
572
573	return pool;
574
575	err_destroy_pool:
576	panthor_heap_pool_destroy(pool);
577	return ERR_PTR(error: ret);
578	}
579
580	/**
581	* panthor_heap_pool_destroy() - Destroy a heap pool.
582	* @pool: Pool to destroy.
583	*
584	* This function destroys all heap contexts and their resources. Thus
585	* preventing any use of the heap context or the chunk attached to them
586	* after that point.
587	*
588	* If the GPU still has access to some heap contexts, a fault should be
589	* triggered, which should flag the command stream groups using these
590	* context as faulty.
591	*
592	* The heap pool object is only released when all references to this pool
593	* are released.
594	*/
595	void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
596	{
597	struct panthor_heap *heap;
598	unsigned long i;
599
600	if (!pool)
601	return;
602
603	down_write(sem: &pool->lock);
604	xa_for_each(&pool->xa, i, heap)
605	drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
606
607	if (!IS_ERR_OR_NULL(ptr: pool->gpu_contexts)) {
608	atomic_sub(i: pool->gpu_contexts->obj->size, v: &pool->size);
609	panthor_kernel_bo_destroy(bo: pool->gpu_contexts);
610	}
611
612	/ Reflects the fact the pool has been destroyed. /
613	pool->vm = NULL;
614	up_write(sem: &pool->lock);
615
616	panthor_heap_pool_put(pool);
617	}
618
619	/**
620	* panthor_heap_pool_size() - Get a heap pool's total size
621	* @pool: Pool whose total chunks size to return
622	*
623	* Returns the aggregated size of all chunks for all heaps in the pool
624	*
625	*/
626	size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
627	{
628	if (!pool)
629	return `0`;
630
631	return atomic_read(v: &pool->size);
632	}
633

source code of linux/drivers/gpu/drm/panthor/panthor_heap.c