1// SPDX-License-Identifier: GPL-2.0 or MIT
2/* Copyright 2023 Collabora ltd. */
3
4#include <linux/iosys-map.h>
5#include <linux/rwsem.h>
6
7#include <drm/drm_print.h>
8#include <drm/panthor_drm.h>
9
10#include "panthor_device.h"
11#include "panthor_gem.h"
12#include "panthor_heap.h"
13#include "panthor_mmu.h"
14#include "panthor_regs.h"
15
16/*
17 * The GPU heap context is an opaque structure used by the GPU to track the
18 * heap allocations. The driver should only touch it to initialize it (zero all
19 * fields). Because the CPU and GPU can both access this structure it is
20 * required to be GPU cache line aligned.
21 */
22#define HEAP_CONTEXT_SIZE 32
23
24/**
25 * struct panthor_heap_chunk_header - Heap chunk header
26 */
27struct panthor_heap_chunk_header {
28 /**
29 * @next: Next heap chunk in the list.
30 *
31 * This is a GPU VA.
32 */
33 u64 next;
34
35 /** @unknown: MBZ. */
36 u32 unknown[14];
37};
38
39/**
40 * struct panthor_heap_chunk - Structure used to keep track of allocated heap chunks.
41 */
42struct panthor_heap_chunk {
43 /** @node: Used to insert the heap chunk in panthor_heap::chunks. */
44 struct list_head node;
45
46 /** @bo: Buffer object backing the heap chunk. */
47 struct panthor_kernel_bo *bo;
48};
49
50/**
51 * struct panthor_heap - Structure used to manage tiler heap contexts.
52 */
53struct panthor_heap {
54 /** @chunks: List containing all heap chunks allocated so far. */
55 struct list_head chunks;
56
57 /** @lock: Lock protecting insertion in the chunks list. */
58 struct mutex lock;
59
60 /** @chunk_size: Size of each chunk. */
61 u32 chunk_size;
62
63 /** @max_chunks: Maximum number of chunks. */
64 u32 max_chunks;
65
66 /**
67 * @target_in_flight: Number of in-flight render passes after which
68 * we'd let the FW wait for fragment job to finish instead of allocating new chunks.
69 */
70 u32 target_in_flight;
71
72 /** @chunk_count: Number of heap chunks currently allocated. */
73 u32 chunk_count;
74};
75
76#define MAX_HEAPS_PER_POOL 128
77
78/**
79 * struct panthor_heap_pool - Pool of heap contexts
80 *
81 * The pool is attached to a panthor_file and can't be shared across processes.
82 */
83struct panthor_heap_pool {
84 /** @refcount: Reference count. */
85 struct kref refcount;
86
87 /** @ptdev: Device. */
88 struct panthor_device *ptdev;
89
90 /** @vm: VM this pool is bound to. */
91 struct panthor_vm *vm;
92
93 /** @lock: Lock protecting access to @xa. */
94 struct rw_semaphore lock;
95
96 /** @xa: Array storing panthor_heap objects. */
97 struct xarray xa;
98
99 /** @gpu_contexts: Buffer object containing the GPU heap contexts. */
100 struct panthor_kernel_bo *gpu_contexts;
101
102 /** @size: Size of all chunks across all heaps in the pool. */
103 atomic_t size;
104};
105
106static int panthor_heap_ctx_stride(struct panthor_device *ptdev)
107{
108 u32 l2_features = ptdev->gpu_info.l2_features;
109 u32 gpu_cache_line_size = GPU_L2_FEATURES_LINE_SIZE(l2_features);
110
111 return ALIGN(HEAP_CONTEXT_SIZE, gpu_cache_line_size);
112}
113
114static int panthor_get_heap_ctx_offset(struct panthor_heap_pool *pool, int id)
115{
116 return panthor_heap_ctx_stride(ptdev: pool->ptdev) * id;
117}
118
119static void *panthor_get_heap_ctx(struct panthor_heap_pool *pool, int id)
120{
121 return pool->gpu_contexts->kmap +
122 panthor_get_heap_ctx_offset(pool, id);
123}
124
125static void panthor_free_heap_chunk(struct panthor_heap_pool *pool,
126 struct panthor_heap *heap,
127 struct panthor_heap_chunk *chunk)
128{
129 mutex_lock(&heap->lock);
130 list_del(entry: &chunk->node);
131 heap->chunk_count--;
132 mutex_unlock(lock: &heap->lock);
133
134 atomic_sub(i: heap->chunk_size, v: &pool->size);
135
136 panthor_kernel_bo_destroy(bo: chunk->bo);
137 kfree(objp: chunk);
138}
139
140static int panthor_alloc_heap_chunk(struct panthor_heap_pool *pool,
141 struct panthor_heap *heap,
142 bool initial_chunk)
143{
144 struct panthor_heap_chunk *chunk;
145 struct panthor_heap_chunk_header *hdr;
146 int ret;
147
148 chunk = kmalloc(sizeof(*chunk), GFP_KERNEL);
149 if (!chunk)
150 return -ENOMEM;
151
152 chunk->bo = panthor_kernel_bo_create(ptdev: pool->ptdev, vm: pool->vm, size: heap->chunk_size,
153 bo_flags: DRM_PANTHOR_BO_NO_MMAP,
154 vm_map_flags: DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
155 PANTHOR_VM_KERNEL_AUTO_VA,
156 name: "Tiler heap chunk");
157 if (IS_ERR(ptr: chunk->bo)) {
158 ret = PTR_ERR(ptr: chunk->bo);
159 goto err_free_chunk;
160 }
161
162 ret = panthor_kernel_bo_vmap(bo: chunk->bo);
163 if (ret)
164 goto err_destroy_bo;
165
166 hdr = chunk->bo->kmap;
167 memset(hdr, 0, sizeof(*hdr));
168
169 if (initial_chunk && !list_empty(head: &heap->chunks)) {
170 struct panthor_heap_chunk *prev_chunk;
171 u64 prev_gpuva;
172
173 prev_chunk = list_first_entry(&heap->chunks,
174 struct panthor_heap_chunk,
175 node);
176
177 prev_gpuva = panthor_kernel_bo_gpuva(bo: prev_chunk->bo);
178 hdr->next = (prev_gpuva & GENMASK_ULL(63, 12)) |
179 (heap->chunk_size >> 12);
180 }
181
182 panthor_kernel_bo_vunmap(bo: chunk->bo);
183
184 mutex_lock(&heap->lock);
185 list_add(new: &chunk->node, head: &heap->chunks);
186 heap->chunk_count++;
187 mutex_unlock(lock: &heap->lock);
188
189 atomic_add(i: heap->chunk_size, v: &pool->size);
190
191 return 0;
192
193err_destroy_bo:
194 panthor_kernel_bo_destroy(bo: chunk->bo);
195
196err_free_chunk:
197 kfree(objp: chunk);
198
199 return ret;
200}
201
202static void panthor_free_heap_chunks(struct panthor_heap_pool *pool,
203 struct panthor_heap *heap)
204{
205 struct panthor_heap_chunk *chunk, *tmp;
206
207 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node)
208 panthor_free_heap_chunk(pool, heap, chunk);
209}
210
211static int panthor_alloc_heap_chunks(struct panthor_heap_pool *pool,
212 struct panthor_heap *heap,
213 u32 chunk_count)
214{
215 int ret;
216 u32 i;
217
218 for (i = 0; i < chunk_count; i++) {
219 ret = panthor_alloc_heap_chunk(pool, heap, initial_chunk: true);
220 if (ret)
221 return ret;
222 }
223
224 return 0;
225}
226
227static int
228panthor_heap_destroy_locked(struct panthor_heap_pool *pool, u32 handle)
229{
230 struct panthor_heap *heap;
231
232 heap = xa_erase(&pool->xa, index: handle);
233 if (!heap)
234 return -EINVAL;
235
236 panthor_free_heap_chunks(pool, heap);
237 mutex_destroy(lock: &heap->lock);
238 kfree(objp: heap);
239 return 0;
240}
241
242/**
243 * panthor_heap_destroy() - Destroy a heap context
244 * @pool: Pool this context belongs to.
245 * @handle: Handle returned by panthor_heap_create().
246 */
247int panthor_heap_destroy(struct panthor_heap_pool *pool, u32 handle)
248{
249 int ret;
250
251 down_write(sem: &pool->lock);
252 ret = panthor_heap_destroy_locked(pool, handle);
253 up_write(sem: &pool->lock);
254
255 return ret;
256}
257
258/**
259 * panthor_heap_create() - Create a heap context
260 * @pool: Pool to instantiate the heap context from.
261 * @initial_chunk_count: Number of chunk allocated at initialization time.
262 * Must be at least 1.
263 * @chunk_size: The size of each chunk. Must be page-aligned and lie in the
264 * [128k:8M] range.
265 * @max_chunks: Maximum number of chunks that can be allocated.
266 * @target_in_flight: Maximum number of in-flight render passes.
267 * @heap_ctx_gpu_va: Pointer holding the GPU address of the allocated heap
268 * context.
269 * @first_chunk_gpu_va: Pointer holding the GPU address of the first chunk
270 * assigned to the heap context.
271 *
272 * Return: a positive handle on success, a negative error otherwise.
273 */
274int panthor_heap_create(struct panthor_heap_pool *pool,
275 u32 initial_chunk_count,
276 u32 chunk_size,
277 u32 max_chunks,
278 u32 target_in_flight,
279 u64 *heap_ctx_gpu_va,
280 u64 *first_chunk_gpu_va)
281{
282 struct panthor_heap *heap;
283 struct panthor_heap_chunk *first_chunk;
284 struct panthor_vm *vm;
285 int ret = 0;
286 u32 id;
287
288 if (initial_chunk_count == 0)
289 return -EINVAL;
290
291 if (initial_chunk_count > max_chunks)
292 return -EINVAL;
293
294 if (!IS_ALIGNED(chunk_size, PAGE_SIZE) ||
295 chunk_size < SZ_128K || chunk_size > SZ_8M)
296 return -EINVAL;
297
298 down_read(sem: &pool->lock);
299 vm = panthor_vm_get(vm: pool->vm);
300 up_read(sem: &pool->lock);
301
302 /* The pool has been destroyed, we can't create a new heap. */
303 if (!vm)
304 return -EINVAL;
305
306 heap = kzalloc(sizeof(*heap), GFP_KERNEL);
307 if (!heap) {
308 ret = -ENOMEM;
309 goto err_put_vm;
310 }
311
312 mutex_init(&heap->lock);
313 INIT_LIST_HEAD(list: &heap->chunks);
314 heap->chunk_size = chunk_size;
315 heap->max_chunks = max_chunks;
316 heap->target_in_flight = target_in_flight;
317
318 ret = panthor_alloc_heap_chunks(pool, heap, chunk_count: initial_chunk_count);
319 if (ret)
320 goto err_free_heap;
321
322 first_chunk = list_first_entry(&heap->chunks,
323 struct panthor_heap_chunk,
324 node);
325 *first_chunk_gpu_va = panthor_kernel_bo_gpuva(bo: first_chunk->bo);
326
327 down_write(sem: &pool->lock);
328 /* The pool has been destroyed, we can't create a new heap. */
329 if (!pool->vm) {
330 ret = -EINVAL;
331 } else {
332 ret = xa_alloc(xa: &pool->xa, id: &id, entry: heap,
333 XA_LIMIT(0, MAX_HEAPS_PER_POOL - 1), GFP_KERNEL);
334 if (!ret) {
335 void *gpu_ctx = panthor_get_heap_ctx(pool, id);
336
337 memset(gpu_ctx, 0, panthor_heap_ctx_stride(pool->ptdev));
338 *heap_ctx_gpu_va = panthor_kernel_bo_gpuva(bo: pool->gpu_contexts) +
339 panthor_get_heap_ctx_offset(pool, id);
340 }
341 }
342 up_write(sem: &pool->lock);
343
344 if (ret)
345 goto err_free_heap;
346
347 panthor_vm_put(vm);
348 return id;
349
350err_free_heap:
351 panthor_free_heap_chunks(pool, heap);
352 mutex_destroy(lock: &heap->lock);
353 kfree(objp: heap);
354
355err_put_vm:
356 panthor_vm_put(vm);
357 return ret;
358}
359
360/**
361 * panthor_heap_return_chunk() - Return an unused heap chunk
362 * @pool: The pool this heap belongs to.
363 * @heap_gpu_va: The GPU address of the heap context.
364 * @chunk_gpu_va: The chunk VA to return.
365 *
366 * This function is used when a chunk allocated with panthor_heap_grow()
367 * couldn't be linked to the heap context through the FW interface because
368 * the group requesting the allocation was scheduled out in the meantime.
369 */
370int panthor_heap_return_chunk(struct panthor_heap_pool *pool,
371 u64 heap_gpu_va,
372 u64 chunk_gpu_va)
373{
374 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(bo: pool->gpu_contexts);
375 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(ptdev: pool->ptdev);
376 struct panthor_heap_chunk *chunk, *tmp, *removed = NULL;
377 struct panthor_heap *heap;
378 int ret;
379
380 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
381 return -EINVAL;
382
383 down_read(sem: &pool->lock);
384 heap = xa_load(&pool->xa, index: heap_id);
385 if (!heap) {
386 ret = -EINVAL;
387 goto out_unlock;
388 }
389
390 chunk_gpu_va &= GENMASK_ULL(63, 12);
391
392 mutex_lock(&heap->lock);
393 list_for_each_entry_safe(chunk, tmp, &heap->chunks, node) {
394 if (panthor_kernel_bo_gpuva(bo: chunk->bo) == chunk_gpu_va) {
395 removed = chunk;
396 list_del(entry: &chunk->node);
397 heap->chunk_count--;
398 atomic_sub(i: heap->chunk_size, v: &pool->size);
399 break;
400 }
401 }
402 mutex_unlock(lock: &heap->lock);
403
404 if (removed) {
405 panthor_kernel_bo_destroy(bo: chunk->bo);
406 kfree(objp: chunk);
407 ret = 0;
408 } else {
409 ret = -EINVAL;
410 }
411
412out_unlock:
413 up_read(sem: &pool->lock);
414 return ret;
415}
416
417/**
418 * panthor_heap_grow() - Make a heap context grow.
419 * @pool: The pool this heap belongs to.
420 * @heap_gpu_va: The GPU address of the heap context.
421 * @renderpasses_in_flight: Number of render passes currently in-flight.
422 * @pending_frag_count: Number of fragment jobs waiting for execution/completion.
423 * @new_chunk_gpu_va: Pointer used to return the chunk VA.
424 *
425 * Return:
426 * - 0 if a new heap was allocated
427 * - -ENOMEM if the tiler context reached the maximum number of chunks
428 * or if too many render passes are in-flight
429 * or if the allocation failed
430 * - -EINVAL if any of the arguments passed to panthor_heap_grow() is invalid
431 */
432int panthor_heap_grow(struct panthor_heap_pool *pool,
433 u64 heap_gpu_va,
434 u32 renderpasses_in_flight,
435 u32 pending_frag_count,
436 u64 *new_chunk_gpu_va)
437{
438 u64 offset = heap_gpu_va - panthor_kernel_bo_gpuva(bo: pool->gpu_contexts);
439 u32 heap_id = (u32)offset / panthor_heap_ctx_stride(ptdev: pool->ptdev);
440 struct panthor_heap_chunk *chunk;
441 struct panthor_heap *heap;
442 int ret;
443
444 if (offset > U32_MAX || heap_id >= MAX_HEAPS_PER_POOL)
445 return -EINVAL;
446
447 down_read(sem: &pool->lock);
448 heap = xa_load(&pool->xa, index: heap_id);
449 if (!heap) {
450 ret = -EINVAL;
451 goto out_unlock;
452 }
453
454 /* If we reached the target in-flight render passes, or if we
455 * reached the maximum number of chunks, let the FW figure another way to
456 * find some memory (wait for render passes to finish, or call the exception
457 * handler provided by the userspace driver, if any).
458 */
459 if (renderpasses_in_flight > heap->target_in_flight ||
460 heap->chunk_count >= heap->max_chunks) {
461 ret = -ENOMEM;
462 goto out_unlock;
463 }
464
465 /* FIXME: panthor_alloc_heap_chunk() triggers a kernel BO creation,
466 * which goes through the blocking allocation path. Ultimately, we
467 * want a non-blocking allocation, so we can immediately report to the
468 * FW when the system is running out of memory. In that case, the FW
469 * can call a user-provided exception handler, which might try to free
470 * some tiler memory by issuing an intermediate fragment job. If the
471 * exception handler can't do anything, it will flag the queue as
472 * faulty so the job that triggered this tiler chunk allocation and all
473 * further jobs in this queue fail immediately instead of having to
474 * wait for the job timeout.
475 */
476 ret = panthor_alloc_heap_chunk(pool, heap, initial_chunk: false);
477 if (ret)
478 goto out_unlock;
479
480 chunk = list_first_entry(&heap->chunks,
481 struct panthor_heap_chunk,
482 node);
483 *new_chunk_gpu_va = (panthor_kernel_bo_gpuva(bo: chunk->bo) & GENMASK_ULL(63, 12)) |
484 (heap->chunk_size >> 12);
485 ret = 0;
486
487out_unlock:
488 up_read(sem: &pool->lock);
489 return ret;
490}
491
492static void panthor_heap_pool_release(struct kref *refcount)
493{
494 struct panthor_heap_pool *pool =
495 container_of(refcount, struct panthor_heap_pool, refcount);
496
497 xa_destroy(&pool->xa);
498 kfree(objp: pool);
499}
500
501/**
502 * panthor_heap_pool_put() - Release a heap pool reference
503 * @pool: Pool to release the reference on. Can be NULL.
504 */
505void panthor_heap_pool_put(struct panthor_heap_pool *pool)
506{
507 if (pool)
508 kref_put(kref: &pool->refcount, release: panthor_heap_pool_release);
509}
510
511/**
512 * panthor_heap_pool_get() - Get a heap pool reference
513 * @pool: Pool to get the reference on. Can be NULL.
514 *
515 * Return: @pool.
516 */
517struct panthor_heap_pool *
518panthor_heap_pool_get(struct panthor_heap_pool *pool)
519{
520 if (pool)
521 kref_get(kref: &pool->refcount);
522
523 return pool;
524}
525
526/**
527 * panthor_heap_pool_create() - Create a heap pool
528 * @ptdev: Device.
529 * @vm: The VM this heap pool will be attached to.
530 *
531 * Heap pools might contain up to 128 heap contexts, and are per-VM.
532 *
533 * Return: A valid pointer on success, a negative error code otherwise.
534 */
535struct panthor_heap_pool *
536panthor_heap_pool_create(struct panthor_device *ptdev, struct panthor_vm *vm)
537{
538 size_t bosize = ALIGN(MAX_HEAPS_PER_POOL *
539 panthor_heap_ctx_stride(ptdev),
540 4096);
541 struct panthor_heap_pool *pool;
542 int ret = 0;
543
544 pool = kzalloc(sizeof(*pool), GFP_KERNEL);
545 if (!pool)
546 return ERR_PTR(error: -ENOMEM);
547
548 /* We want a weak ref here: the heap pool belongs to the VM, so we're
549 * sure that, as long as the heap pool exists, the VM exists too.
550 */
551 pool->vm = vm;
552 pool->ptdev = ptdev;
553 init_rwsem(&pool->lock);
554 xa_init_flags(xa: &pool->xa, XA_FLAGS_ALLOC);
555 kref_init(kref: &pool->refcount);
556
557 pool->gpu_contexts = panthor_kernel_bo_create(ptdev, vm, size: bosize,
558 bo_flags: DRM_PANTHOR_BO_NO_MMAP,
559 vm_map_flags: DRM_PANTHOR_VM_BIND_OP_MAP_NOEXEC,
560 PANTHOR_VM_KERNEL_AUTO_VA,
561 name: "Heap pool");
562 if (IS_ERR(ptr: pool->gpu_contexts)) {
563 ret = PTR_ERR(ptr: pool->gpu_contexts);
564 goto err_destroy_pool;
565 }
566
567 ret = panthor_kernel_bo_vmap(bo: pool->gpu_contexts);
568 if (ret)
569 goto err_destroy_pool;
570
571 atomic_add(i: pool->gpu_contexts->obj->size, v: &pool->size);
572
573 return pool;
574
575err_destroy_pool:
576 panthor_heap_pool_destroy(pool);
577 return ERR_PTR(error: ret);
578}
579
580/**
581 * panthor_heap_pool_destroy() - Destroy a heap pool.
582 * @pool: Pool to destroy.
583 *
584 * This function destroys all heap contexts and their resources. Thus
585 * preventing any use of the heap context or the chunk attached to them
586 * after that point.
587 *
588 * If the GPU still has access to some heap contexts, a fault should be
589 * triggered, which should flag the command stream groups using these
590 * context as faulty.
591 *
592 * The heap pool object is only released when all references to this pool
593 * are released.
594 */
595void panthor_heap_pool_destroy(struct panthor_heap_pool *pool)
596{
597 struct panthor_heap *heap;
598 unsigned long i;
599
600 if (!pool)
601 return;
602
603 down_write(sem: &pool->lock);
604 xa_for_each(&pool->xa, i, heap)
605 drm_WARN_ON(&pool->ptdev->base, panthor_heap_destroy_locked(pool, i));
606
607 if (!IS_ERR_OR_NULL(ptr: pool->gpu_contexts)) {
608 atomic_sub(i: pool->gpu_contexts->obj->size, v: &pool->size);
609 panthor_kernel_bo_destroy(bo: pool->gpu_contexts);
610 }
611
612 /* Reflects the fact the pool has been destroyed. */
613 pool->vm = NULL;
614 up_write(sem: &pool->lock);
615
616 panthor_heap_pool_put(pool);
617}
618
619/**
620 * panthor_heap_pool_size() - Get a heap pool's total size
621 * @pool: Pool whose total chunks size to return
622 *
623 * Returns the aggregated size of all chunks for all heaps in the pool
624 *
625 */
626size_t panthor_heap_pool_size(struct panthor_heap_pool *pool)
627{
628 if (!pool)
629 return 0;
630
631 return atomic_read(v: &pool->size);
632}
633

source code of linux/drivers/gpu/drm/panthor/panthor_heap.c