1/*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * based on nouveau_prime.c
23 *
24 * Authors: Alex Deucher
25 */
26
27/**
28 * DOC: PRIME Buffer Sharing
29 *
30 * The following callback implementations are used for :ref:`sharing GEM buffer
31 * objects between different devices via PRIME <prime_buffer_sharing>`.
32 */
33
34#include "amdgpu.h"
35#include "amdgpu_display.h"
36#include "amdgpu_gem.h"
37#include "amdgpu_dma_buf.h"
38#include "amdgpu_xgmi.h"
39#include "amdgpu_vm.h"
40#include <drm/amdgpu_drm.h>
41#include <drm/ttm/ttm_tt.h>
42#include <linux/dma-buf.h>
43#include <linux/dma-fence-array.h>
44#include <linux/pci-p2pdma.h>
45
46static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops;
47
48/**
49 * dma_buf_attach_adev - Helper to get adev of an attachment
50 *
51 * @attach: attachment
52 *
53 * Returns:
54 * A struct amdgpu_device * if the attaching device is an amdgpu device or
55 * partition, NULL otherwise.
56 */
57static struct amdgpu_device *dma_buf_attach_adev(struct dma_buf_attachment *attach)
58{
59 if (attach->importer_ops == &amdgpu_dma_buf_attach_ops) {
60 struct drm_gem_object *obj = attach->importer_priv;
61 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
62
63 return amdgpu_ttm_adev(bdev: bo->tbo.bdev);
64 }
65
66 return NULL;
67}
68
69/**
70 * amdgpu_dma_buf_attach - &dma_buf_ops.attach implementation
71 *
72 * @dmabuf: DMA-buf where we attach to
73 * @attach: attachment to add
74 *
75 * Add the attachment as user to the exported DMA-buf.
76 */
77static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf,
78 struct dma_buf_attachment *attach)
79{
80 struct amdgpu_device *attach_adev = dma_buf_attach_adev(attach);
81 struct drm_gem_object *obj = dmabuf->priv;
82 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
83 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev);
84 int r;
85
86 /*
87 * Disable peer-to-peer access for DCC-enabled VRAM surfaces on GFX12+.
88 * Such buffers cannot be safely accessed over P2P due to device-local
89 * compression metadata. Fallback to system-memory path instead.
90 * Device supports GFX12 (GC 12.x or newer)
91 * BO was created with the AMDGPU_GEM_CREATE_GFX12_DCC flag
92 *
93 */
94 if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) >= IP_VERSION(12, 0, 0) &&
95 bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC)
96 attach->peer2peer = false;
97
98 if (!amdgpu_dmabuf_is_xgmi_accessible(adev: attach_adev, bo) &&
99 pci_p2pdma_distance(provider: adev->pdev, client: attach->dev, verbose: false) < 0)
100 attach->peer2peer = false;
101
102 r = dma_resv_lock(obj: bo->tbo.base.resv, NULL);
103 if (r)
104 return r;
105
106 amdgpu_vm_bo_update_shared(bo);
107
108 dma_resv_unlock(obj: bo->tbo.base.resv);
109
110 return 0;
111}
112
113/**
114 * amdgpu_dma_buf_pin - &dma_buf_ops.pin implementation
115 *
116 * @attach: attachment to pin down
117 *
118 * Pin the BO which is backing the DMA-buf so that it can't move any more.
119 */
120static int amdgpu_dma_buf_pin(struct dma_buf_attachment *attach)
121{
122 struct dma_buf *dmabuf = attach->dmabuf;
123 struct amdgpu_bo *bo = gem_to_amdgpu_bo(dmabuf->priv);
124 u32 domains = bo->allowed_domains;
125
126 dma_resv_assert_held(dmabuf->resv);
127
128 /* Try pinning into VRAM to allow P2P with RDMA NICs without ODP
129 * support if all attachments can do P2P. If any attachment can't do
130 * P2P just pin into GTT instead.
131 *
132 * To avoid with conflicting pinnings between GPUs and RDMA when move
133 * notifiers are disabled, only allow pinning in VRAM when move
134 * notiers are enabled.
135 */
136 if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
137 domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
138 } else {
139 list_for_each_entry(attach, &dmabuf->attachments, node)
140 if (!attach->peer2peer)
141 domains &= ~AMDGPU_GEM_DOMAIN_VRAM;
142 }
143
144 if (domains & AMDGPU_GEM_DOMAIN_VRAM)
145 bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
146
147 if (WARN_ON(!domains))
148 return -EINVAL;
149
150 return amdgpu_bo_pin(bo, domain: domains);
151}
152
153/**
154 * amdgpu_dma_buf_unpin - &dma_buf_ops.unpin implementation
155 *
156 * @attach: attachment to unpin
157 *
158 * Unpin a previously pinned BO to make it movable again.
159 */
160static void amdgpu_dma_buf_unpin(struct dma_buf_attachment *attach)
161{
162 struct drm_gem_object *obj = attach->dmabuf->priv;
163 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
164
165 amdgpu_bo_unpin(bo);
166}
167
168/**
169 * amdgpu_dma_buf_map - &dma_buf_ops.map_dma_buf implementation
170 * @attach: DMA-buf attachment
171 * @dir: DMA direction
172 *
173 * Makes sure that the shared DMA buffer can be accessed by the target device.
174 * For now, simply pins it to the GTT domain, where it should be accessible by
175 * all DMA devices.
176 *
177 * Returns:
178 * sg_table filled with the DMA addresses to use or ERR_PRT with negative error
179 * code.
180 */
181static struct sg_table *amdgpu_dma_buf_map(struct dma_buf_attachment *attach,
182 enum dma_data_direction dir)
183{
184 struct dma_buf *dma_buf = attach->dmabuf;
185 struct drm_gem_object *obj = dma_buf->priv;
186 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
187 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev);
188 struct sg_table *sgt;
189 long r;
190
191 if (!bo->tbo.pin_count) {
192 /* move buffer into GTT or VRAM */
193 struct ttm_operation_ctx ctx = { false, false };
194 unsigned int domains = AMDGPU_GEM_DOMAIN_GTT;
195
196 if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM &&
197 attach->peer2peer) {
198 bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
199 domains |= AMDGPU_GEM_DOMAIN_VRAM;
200 }
201 amdgpu_bo_placement_from_domain(abo: bo, domain: domains);
202 r = ttm_bo_validate(bo: &bo->tbo, placement: &bo->placement, ctx: &ctx);
203 if (r)
204 return ERR_PTR(error: r);
205 }
206
207 switch (bo->tbo.resource->mem_type) {
208 case TTM_PL_TT:
209 sgt = drm_prime_pages_to_sg(dev: obj->dev,
210 pages: bo->tbo.ttm->pages,
211 nr_pages: bo->tbo.ttm->num_pages);
212 if (IS_ERR(ptr: sgt))
213 return sgt;
214
215 if (dma_map_sgtable(dev: attach->dev, sgt, dir,
216 DMA_ATTR_SKIP_CPU_SYNC))
217 goto error_free;
218 break;
219
220 case TTM_PL_VRAM:
221 /* XGMI-accessible memory should never be DMA-mapped */
222 if (WARN_ON(amdgpu_dmabuf_is_xgmi_accessible(
223 dma_buf_attach_adev(attach), bo)))
224 return ERR_PTR(error: -EINVAL);
225
226 r = amdgpu_vram_mgr_alloc_sgt(adev, mem: bo->tbo.resource, offset: 0,
227 size: bo->tbo.base.size, dev: attach->dev,
228 dir, sgt: &sgt);
229 if (r)
230 return ERR_PTR(error: r);
231 break;
232 default:
233 return ERR_PTR(error: -EINVAL);
234 }
235
236 return sgt;
237
238error_free:
239 sg_free_table(sgt);
240 kfree(objp: sgt);
241 return ERR_PTR(error: -EBUSY);
242}
243
244/**
245 * amdgpu_dma_buf_unmap - &dma_buf_ops.unmap_dma_buf implementation
246 * @attach: DMA-buf attachment
247 * @sgt: sg_table to unmap
248 * @dir: DMA direction
249 *
250 * This is called when a shared DMA buffer no longer needs to be accessible by
251 * another device. For now, simply unpins the buffer from GTT.
252 */
253static void amdgpu_dma_buf_unmap(struct dma_buf_attachment *attach,
254 struct sg_table *sgt,
255 enum dma_data_direction dir)
256{
257 if (sg_page(sg: sgt->sgl)) {
258 dma_unmap_sgtable(dev: attach->dev, sgt, dir, attrs: 0);
259 sg_free_table(sgt);
260 kfree(objp: sgt);
261 } else {
262 amdgpu_vram_mgr_free_sgt(dev: attach->dev, dir, sgt);
263 }
264}
265
266/**
267 * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
268 * @dma_buf: Shared DMA buffer
269 * @direction: Direction of DMA transfer
270 *
271 * This is called before CPU access to the shared DMA buffer's memory. If it's
272 * a read access, the buffer is moved to the GTT domain if possible, for optimal
273 * CPU read performance.
274 *
275 * Returns:
276 * 0 on success or a negative error code on failure.
277 */
278static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
279 enum dma_data_direction direction)
280{
281 struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
282 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev);
283 struct ttm_operation_ctx ctx = { true, false };
284 u32 domain = amdgpu_display_supported_domains(adev, bo_flags: bo->flags);
285 int ret;
286 bool reads = (direction == DMA_BIDIRECTIONAL ||
287 direction == DMA_FROM_DEVICE);
288
289 if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
290 return 0;
291
292 /* move to gtt */
293 ret = amdgpu_bo_reserve(bo, no_intr: false);
294 if (unlikely(ret != 0))
295 return ret;
296
297 if (!bo->tbo.pin_count &&
298 (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
299 amdgpu_bo_placement_from_domain(abo: bo, AMDGPU_GEM_DOMAIN_GTT);
300 ret = ttm_bo_validate(bo: &bo->tbo, placement: &bo->placement, ctx: &ctx);
301 }
302
303 amdgpu_bo_unreserve(bo);
304 return ret;
305}
306
307static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map)
308{
309 struct drm_gem_object *obj = dma_buf->priv;
310 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
311 int ret;
312
313 /*
314 * Pin to keep buffer in place while it's vmap'ed. The actual
315 * domain is not that important as long as it's mapable. Using
316 * GTT and VRAM should be compatible with most use cases.
317 */
318 ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM);
319 if (ret)
320 return ret;
321 ret = drm_gem_dmabuf_vmap(dma_buf, map);
322 if (ret)
323 amdgpu_bo_unpin(bo);
324
325 return ret;
326}
327
328static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map)
329{
330 struct drm_gem_object *obj = dma_buf->priv;
331 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
332
333 drm_gem_dmabuf_vunmap(dma_buf, map);
334 amdgpu_bo_unpin(bo);
335}
336
337const struct dma_buf_ops amdgpu_dmabuf_ops = {
338 .attach = amdgpu_dma_buf_attach,
339 .pin = amdgpu_dma_buf_pin,
340 .unpin = amdgpu_dma_buf_unpin,
341 .map_dma_buf = amdgpu_dma_buf_map,
342 .unmap_dma_buf = amdgpu_dma_buf_unmap,
343 .release = drm_gem_dmabuf_release,
344 .begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
345 .mmap = drm_gem_dmabuf_mmap,
346 .vmap = amdgpu_dma_buf_vmap,
347 .vunmap = amdgpu_dma_buf_vunmap,
348};
349
350/**
351 * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
352 * @gobj: GEM BO
353 * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
354 *
355 * The main work is done by the &drm_gem_prime_export helper.
356 *
357 * Returns:
358 * Shared DMA buffer representing the GEM BO from the given device.
359 */
360struct dma_buf *amdgpu_gem_prime_export(struct drm_gem_object *gobj,
361 int flags)
362{
363 struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
364 struct dma_buf *buf;
365 struct ttm_operation_ctx ctx = {
366 .interruptible = true,
367 .no_wait_gpu = true,
368 /* We opt to avoid OOM on system pages allocations */
369 .gfp_retry_mayfail = true,
370 .allow_res_evict = false,
371 };
372 int ret;
373
374 if (amdgpu_ttm_tt_get_usermm(ttm: bo->tbo.ttm) ||
375 bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
376 return ERR_PTR(error: -EPERM);
377
378 ret = ttm_bo_setup_export(bo: &bo->tbo, ctx: &ctx);
379 if (ret)
380 return ERR_PTR(error: ret);
381
382 buf = drm_gem_prime_export(obj: gobj, flags);
383 if (!IS_ERR(ptr: buf))
384 buf->ops = &amdgpu_dmabuf_ops;
385
386 return buf;
387}
388
389/**
390 * amdgpu_dma_buf_create_obj - create BO for DMA-buf import
391 *
392 * @dev: DRM device
393 * @dma_buf: DMA-buf
394 *
395 * Creates an empty SG BO for DMA-buf import.
396 *
397 * Returns:
398 * A new GEM BO of the given DRM device, representing the memory
399 * described by the given DMA-buf attachment and scatter/gather table.
400 */
401static struct drm_gem_object *
402amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
403{
404 struct dma_resv *resv = dma_buf->resv;
405 struct amdgpu_device *adev = drm_to_adev(ddev: dev);
406 struct drm_gem_object *gobj;
407 struct amdgpu_bo *bo;
408 uint64_t flags = 0;
409 int ret;
410
411 dma_resv_lock(obj: resv, NULL);
412
413 if (dma_buf->ops == &amdgpu_dmabuf_ops) {
414 struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv);
415
416 flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
417 AMDGPU_GEM_CREATE_COHERENT |
418 AMDGPU_GEM_CREATE_EXT_COHERENT |
419 AMDGPU_GEM_CREATE_UNCACHED);
420 }
421
422 ret = amdgpu_gem_object_create(adev, size: dma_buf->size, PAGE_SIZE,
423 AMDGPU_GEM_DOMAIN_CPU, flags,
424 type: ttm_bo_type_sg, resv, obj: &gobj, xcp_id_plus1: 0);
425 if (ret)
426 goto error;
427
428 bo = gem_to_amdgpu_bo(gobj);
429 bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
430 bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT;
431
432 dma_resv_unlock(obj: resv);
433 return gobj;
434
435error:
436 dma_resv_unlock(obj: resv);
437 return ERR_PTR(error: ret);
438}
439
440/**
441 * amdgpu_dma_buf_move_notify - &attach.move_notify implementation
442 *
443 * @attach: the DMA-buf attachment
444 *
445 * Invalidate the DMA-buf attachment, making sure that the we re-create the
446 * mapping before the next use.
447 */
448static void
449amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach)
450{
451 struct drm_gem_object *obj = attach->importer_priv;
452 struct ww_acquire_ctx *ticket = dma_resv_locking_ctx(obj: obj->resv);
453 struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
454 struct amdgpu_device *adev = amdgpu_ttm_adev(bdev: bo->tbo.bdev);
455 struct ttm_operation_ctx ctx = { false, false };
456 struct ttm_placement placement = {};
457 struct amdgpu_vm_bo_base *bo_base;
458 int r;
459
460 /* FIXME: This should be after the "if", but needs a fix to make sure
461 * DMABuf imports are initialized in the right VM list.
462 */
463 amdgpu_vm_bo_invalidate(bo, evicted: false);
464 if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM)
465 return;
466
467 r = ttm_bo_validate(bo: &bo->tbo, placement: &placement, ctx: &ctx);
468 if (r) {
469 DRM_ERROR("Failed to invalidate DMA-buf import (%d))\n", r);
470 return;
471 }
472
473 for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) {
474 struct amdgpu_vm *vm = bo_base->vm;
475 struct dma_resv *resv = vm->root.bo->tbo.base.resv;
476
477 if (ticket) {
478 /* When we get an error here it means that somebody
479 * else is holding the VM lock and updating page tables
480 * So we can just continue here.
481 */
482 r = dma_resv_lock(obj: resv, ctx: ticket);
483 if (r)
484 continue;
485
486 } else {
487 /* TODO: This is more problematic and we actually need
488 * to allow page tables updates without holding the
489 * lock.
490 */
491 if (!dma_resv_trylock(obj: resv))
492 continue;
493 }
494
495 /* Reserve fences for two SDMA page table updates */
496 r = dma_resv_reserve_fences(obj: resv, num_fences: 2);
497 if (!r)
498 r = amdgpu_vm_clear_freed(adev, vm, NULL);
499 if (!r)
500 r = amdgpu_vm_handle_moved(adev, vm, ticket);
501
502 if (r && r != -EBUSY)
503 DRM_ERROR("Failed to invalidate VM page tables (%d))\n",
504 r);
505
506 dma_resv_unlock(obj: resv);
507 }
508}
509
510static const struct dma_buf_attach_ops amdgpu_dma_buf_attach_ops = {
511 .allow_peer2peer = true,
512 .move_notify = amdgpu_dma_buf_move_notify
513};
514
515/**
516 * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
517 * @dev: DRM device
518 * @dma_buf: Shared DMA buffer
519 *
520 * Import a dma_buf into a the driver and potentially create a new GEM object.
521 *
522 * Returns:
523 * GEM BO representing the shared DMA buffer for the given device.
524 */
525struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
526 struct dma_buf *dma_buf)
527{
528 struct dma_buf_attachment *attach;
529 struct drm_gem_object *obj;
530
531 if (dma_buf->ops == &amdgpu_dmabuf_ops) {
532 obj = dma_buf->priv;
533 if (obj->dev == dev) {
534 /*
535 * Importing dmabuf exported from out own gem increases
536 * refcount on gem itself instead of f_count of dmabuf.
537 */
538 drm_gem_object_get(obj);
539 return obj;
540 }
541 }
542
543 obj = amdgpu_dma_buf_create_obj(dev, dma_buf);
544 if (IS_ERR(ptr: obj))
545 return obj;
546
547 attach = dma_buf_dynamic_attach(dmabuf: dma_buf, dev: dev->dev,
548 importer_ops: &amdgpu_dma_buf_attach_ops, importer_priv: obj);
549 if (IS_ERR(ptr: attach)) {
550 drm_gem_object_put(obj);
551 return ERR_CAST(ptr: attach);
552 }
553
554 get_dma_buf(dmabuf: dma_buf);
555 obj->import_attach = attach;
556 return obj;
557}
558
559/**
560 * amdgpu_dmabuf_is_xgmi_accessible - Check if xgmi available for P2P transfer
561 *
562 * @adev: amdgpu_device pointer of the importer
563 * @bo: amdgpu buffer object
564 *
565 * Returns:
566 * True if dmabuf accessible over xgmi, false otherwise.
567 */
568bool amdgpu_dmabuf_is_xgmi_accessible(struct amdgpu_device *adev,
569 struct amdgpu_bo *bo)
570{
571 struct drm_gem_object *obj = &bo->tbo.base;
572 struct drm_gem_object *gobj;
573
574 if (!adev)
575 return false;
576
577 if (drm_gem_is_imported(obj)) {
578 struct dma_buf *dma_buf = obj->import_attach->dmabuf;
579
580 if (dma_buf->ops != &amdgpu_dmabuf_ops)
581 /* No XGMI with non AMD GPUs */
582 return false;
583
584 gobj = dma_buf->priv;
585 bo = gem_to_amdgpu_bo(gobj);
586 }
587
588 if (amdgpu_xgmi_same_hive(adev, bo_adev: amdgpu_ttm_adev(bdev: bo->tbo.bdev)) &&
589 (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM))
590 return true;
591
592 return false;
593}
594

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c