amdgpu_sync.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright 2014 Advanced Micro Devices, Inc.
4	* All Rights Reserved.
5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a
7	* copy of this software and associated documentation files (the
8	* "Software"), to deal in the Software without restriction, including
9	* without limitation the rights to use, copy, modify, merge, publish,
10	* distribute, sub license, and/or sell copies of the Software, and to
11	* permit persons to whom the Software is furnished to do so, subject to
12	* the following conditions:
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18	* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19	* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20	* USE OR OTHER DEALINGS IN THE SOFTWARE.
21	*
22	* The above copyright notice and this permission notice (including the
23	* next paragraph) shall be included in all copies or substantial portions
24	* of the Software.
25	*
26	*/
27	/*
28	* Authors:
29	* Christian König <christian.koenig@amd.com>
30	*/
31
32	#include <linux/dma-fence-chain.h>
33
34	#include "amdgpu.h"
35	#include "amdgpu_trace.h"
36	#include "amdgpu_amdkfd.h"
37
38	struct amdgpu_sync_entry {
39	struct hlist_node node;
40	struct dma_fence *fence;
41	};
42
43	static struct kmem_cache *amdgpu_sync_slab;
44
45	/**
46	* amdgpu_sync_create - zero init sync object
47	*
48	* @sync: sync object to initialize
49	*
50	* Just clear the sync object for now.
51	*/
52	void amdgpu_sync_create(struct amdgpu_sync *sync)
53	{
54	hash_init(sync->fences);
55	}
56
57	/**
58	* amdgpu_sync_same_dev - test if fence belong to us
59	*
60	* @adev: amdgpu device to use for the test
61	* @f: fence to test
62	*
63	* Test if the fence was issued by us.
64	*/
65	static bool amdgpu_sync_same_dev(struct amdgpu_device *adev,
66	struct dma_fence *f)
67	{
68	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
69
70	if (s_fence) {
71	struct amdgpu_ring *ring;
72
73	ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
74	return ring->adev == adev;
75	}
76
77	return false;
78	}
79
80	/**
81	* amdgpu_sync_get_owner - extract the owner of a fence
82	*
83	* @f: fence get the owner from
84	*
85	* Extract who originally created the fence.
86	*/
87	static void amdgpu_sync_get_owner(struct* dma_fence *f)
88	{
89	struct drm_sched_fence *s_fence;
90	struct amdgpu_amdkfd_fence *kfd_fence;
91
92	if (!f)
93	return AMDGPU_FENCE_OWNER_UNDEFINED;
94
95	s_fence = to_drm_sched_fence(f);
96	if (s_fence)
97	return s_fence->owner;
98
99	kfd_fence = to_amdgpu_amdkfd_fence(f);
100	if (kfd_fence)
101	return AMDGPU_FENCE_OWNER_KFD;
102
103	return AMDGPU_FENCE_OWNER_UNDEFINED;
104	}
105
106	/**
107	* amdgpu_sync_keep_later - Keep the later fence
108	*
109	* @keep: existing fence to test
110	* @fence: new fence
111	*
112	* Either keep the existing fence or the new one, depending which one is later.
113	*/
114	static void amdgpu_sync_keep_later(struct dma_fence **keep,
115	struct dma_fence *fence)
116	{
117	if (keep && dma_fence_is_later(f1: keep, f2: fence))
118	return;
119
120	dma_fence_put(fence: *keep);
121	*keep = dma_fence_get(fence);
122	}
123
124	/**
125	* amdgpu_sync_add_later - add the fence to the hash
126	*
127	* @sync: sync object to add the fence to
128	* @f: fence to add
129	*
130	* Tries to add the fence to an existing hash entry. Returns true when an entry
131	* was found, false otherwise.
132	*/
133	static bool amdgpu_sync_add_later(struct amdgpu_sync sync, struct* dma_fence *f)
134	{
135	struct amdgpu_sync_entry *e;
136
137	hash_for_each_possible(sync->fences, e, node, f->context) {
138	if (dma_fence_is_signaled(fence: e->fence)) {
139	dma_fence_put(fence: e->fence);
140	e->fence = dma_fence_get(fence: f);
141	return true;
142	}
143
144	if (likely(e->fence->context == f->context)) {
145	amdgpu_sync_keep_later(keep: &e->fence, fence: f);
146	return true;
147	}
148	}
149	return false;
150	}
151
152	/**
153	* amdgpu_sync_fence - remember to sync to this fence
154	*
155	* @sync: sync object to add fence to
156	* @f: fence to sync to
157	* @flags: memory allocation flags to use when allocating sync entry
158	*
159	* Add the fence to the sync object.
160	*/
161	int amdgpu_sync_fence(struct amdgpu_sync sync, struct* dma_fence *f,
162	gfp_t flags)
163	{
164	struct amdgpu_sync_entry *e;
165
166	if (!f)
167	return `0`;
168
169	if (amdgpu_sync_add_later(sync, f))
170	return `0`;
171
172	e = kmem_cache_alloc(amdgpu_sync_slab, flags);
173	if (!e)
174	return -ENOMEM;
175
176	hash_add(sync->fences, &e->node, f->context);
177	e->fence = dma_fence_get(fence: f);
178	return `0`;
179	}
180
181	/ Determine based on the owner and mode if we should sync to a fence or not /
182	static bool amdgpu_sync_test_fence(struct amdgpu_device *adev,
183	enum amdgpu_sync_mode mode,
184	void owner, struct* dma_fence *f)
185	{
186	void *fence_owner = amdgpu_sync_get_owner(f);
187
188	/ Always sync to moves, no matter what /
189	if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED)
190	return true;
191
192	/ We only want to trigger KFD eviction fences on*
193	* evict or move jobs. Skip KFD fences otherwise.
194	*/
195	if (fence_owner == AMDGPU_FENCE_OWNER_KFD &&
196	owner != AMDGPU_FENCE_OWNER_UNDEFINED)
197	return false;
198
199	/ Never sync to VM updates either. /
200	if (fence_owner == AMDGPU_FENCE_OWNER_VM &&
201	owner != AMDGPU_FENCE_OWNER_UNDEFINED &&
202	owner != AMDGPU_FENCE_OWNER_KFD)
203	return false;
204
205	/ Ignore fences depending on the sync mode /
206	switch (mode) {
207	case AMDGPU_SYNC_ALWAYS:
208	return true;
209
210	case AMDGPU_SYNC_NE_OWNER:
211	if (amdgpu_sync_same_dev(adev, f) &&
212	fence_owner == owner)
213	return false;
214	break;
215
216	case AMDGPU_SYNC_EQ_OWNER:
217	if (amdgpu_sync_same_dev(adev, f) &&
218	fence_owner != owner)
219	return false;
220	break;
221
222	case AMDGPU_SYNC_EXPLICIT:
223	return false;
224	}
225
226	WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD,
227	"Adding eviction fence to sync obj");
228	return true;
229	}
230
231	/**
232	* amdgpu_sync_resv - sync to a reservation object
233	*
234	* @adev: amdgpu device
235	* @sync: sync object to add fences from reservation object to
236	* @resv: reservation object with embedded fence
237	* @mode: how owner affects which fences we sync to
238	* @owner: owner of the planned job submission
239	*
240	* Sync to the fence
241	*/
242	int amdgpu_sync_resv(struct amdgpu_device adev, struct* amdgpu_sync *sync,
243	struct dma_resv resv, enum* amdgpu_sync_mode mode,
244	void *owner)
245	{
246	struct dma_resv_iter cursor;
247	struct dma_fence *f;
248	int r;
249
250	if (resv == NULL)
251	return -EINVAL;
252	/ Implicitly sync only to KERNEL, WRITE and READ /
253	dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
254	dma_fence_chain_for_each(f, f) {
255	struct dma_fence *tmp = dma_fence_chain_contained(fence: f);
256
257	if (amdgpu_sync_test_fence(adev, mode, owner, f: tmp)) {
258	r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
259	dma_fence_put(fence: f);
260	if (r)
261	return r;
262	break;
263	}
264	}
265	}
266	return `0`;
267	}
268
269	/**
270	* amdgpu_sync_kfd - sync to KFD fences
271	*
272	* @sync: sync object to add KFD fences to
273	* @resv: reservation object with KFD fences
274	*
275	* Extract all KFD fences and add them to the sync object.
276	*/
277	int amdgpu_sync_kfd(struct amdgpu_sync sync, struct* dma_resv *resv)
278	{
279	struct dma_resv_iter cursor;
280	struct dma_fence *f;
281	int r = `0`;
282
283	dma_resv_iter_begin(cursor: &cursor, obj: resv, usage: DMA_RESV_USAGE_BOOKKEEP);
284	dma_resv_for_each_fence_unlocked(&cursor, f) {
285	void *fence_owner = amdgpu_sync_get_owner(f);
286
287	if (fence_owner != AMDGPU_FENCE_OWNER_KFD)
288	continue;
289
290	r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
291	if (r)
292	break;
293	}
294	dma_resv_iter_end(cursor: &cursor);
295
296	return r;
297	}
298
299	/ Free the entry back to the slab /
300	static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
301	{
302	hash_del(node: &e->node);
303	dma_fence_put(fence: e->fence);
304	kmem_cache_free(s: amdgpu_sync_slab, objp: e);
305	}
306
307	/**
308	* amdgpu_sync_peek_fence - get the next fence not signaled yet
309	*
310	* @sync: the sync object
311	* @ring: optional ring to use for test
312	*
313	* Returns the next fence not signaled yet without removing it from the sync
314	* object.
315	*/
316	struct dma_fence amdgpu_sync_peek_fence(struct* amdgpu_sync *sync,
317	struct amdgpu_ring *ring)
318	{
319	struct amdgpu_sync_entry *e;
320	struct hlist_node *tmp;
321	int i;
322
323	hash_for_each_safe(sync->fences, i, tmp, e, node) {
324	struct dma_fence *f = e->fence;
325	struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
326
327	if (dma_fence_is_signaled(fence: f)) {
328	amdgpu_sync_entry_free(e);
329	continue;
330	}
331	if (ring && s_fence) {
332	/ For fences from the same ring it is sufficient*
333	* when they are scheduled.
334	*/
335	if (s_fence->sched == &ring->sched) {
336	if (dma_fence_is_signaled(fence: &s_fence->scheduled))
337	continue;
338
339	return &s_fence->scheduled;
340	}
341	}
342
343	return f;
344	}
345
346	return NULL;
347	}
348
349	/**
350	* amdgpu_sync_get_fence - get the next fence from the sync object
351	*
352	* @sync: sync object to use
353	*
354	* Get and removes the next fence from the sync object not signaled yet.
355	*/
356	struct dma_fence amdgpu_sync_get_fence(struct* amdgpu_sync *sync)
357	{
358	struct amdgpu_sync_entry *e;
359	struct hlist_node *tmp;
360	struct dma_fence *f;
361	int i;
362
363	hash_for_each_safe(sync->fences, i, tmp, e, node) {
364
365	f = e->fence;
366
367	hash_del(node: &e->node);
368	kmem_cache_free(s: amdgpu_sync_slab, objp: e);
369
370	if (!dma_fence_is_signaled(fence: f))
371	return f;
372
373	dma_fence_put(fence: f);
374	}
375	return NULL;
376	}
377
378	/**
379	* amdgpu_sync_clone - clone a sync object
380	*
381	* @source: sync object to clone
382	* @clone: pointer to destination sync object
383	*
384	* Adds references to all unsignaled fences in @source to @clone. Also
385	* removes signaled fences from @source while at it.
386	*/
387	int amdgpu_sync_clone(struct amdgpu_sync source, struct* amdgpu_sync *clone)
388	{
389	struct amdgpu_sync_entry *e;
390	struct hlist_node *tmp;
391	struct dma_fence *f;
392	int i, r;
393
394	hash_for_each_safe(source->fences, i, tmp, e, node) {
395	f = e->fence;
396	if (!dma_fence_is_signaled(fence: f)) {
397	r = amdgpu_sync_fence(sync: clone, f, GFP_KERNEL);
398	if (r)
399	return r;
400	} else {
401	amdgpu_sync_entry_free(e);
402	}
403	}
404
405	return `0`;
406	}
407
408	/**
409	* amdgpu_sync_move - move all fences from src to dst
410	*
411	* @src: source of the fences, empty after function
412	* @dst: destination for the fences
413	*
414	* Moves all fences from source to destination. All fences in destination are
415	* freed and source is empty after the function call.
416	*/
417	void amdgpu_sync_move(struct amdgpu_sync src, struct* amdgpu_sync *dst)
418	{
419	unsigned int i;
420
421	amdgpu_sync_free(sync: dst);
422
423	for (i = `0`; i < HASH_SIZE(src->fences); ++i)
424	hlist_move_list(old: &src->fences[i], new: &dst->fences[i]);
425	}
426
427	/**
428	* amdgpu_sync_push_to_job - push fences into job
429	* @sync: sync object to get the fences from
430	* @job: job to push the fences into
431	*
432	* Add all unsignaled fences from sync to job.
433	*/
434	int amdgpu_sync_push_to_job(struct amdgpu_sync sync, struct* amdgpu_job *job)
435	{
436	struct amdgpu_sync_entry *e;
437	struct hlist_node *tmp;
438	struct dma_fence *f;
439	int i, r;
440
441	hash_for_each_safe(sync->fences, i, tmp, e, node) {
442	f = e->fence;
443	if (dma_fence_is_signaled(fence: f)) {
444	amdgpu_sync_entry_free(e);
445	continue;
446	}
447
448	dma_fence_get(fence: f);
449	r = drm_sched_job_add_dependency(job: &job->base, fence: f);
450	if (r) {
451	dma_fence_put(fence: f);
452	return r;
453	}
454	}
455	return `0`;
456	}
457
458	int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
459	{
460	struct amdgpu_sync_entry *e;
461	struct hlist_node *tmp;
462	int i, r;
463
464	hash_for_each_safe(sync->fences, i, tmp, e, node) {
465	r = dma_fence_wait(fence: e->fence, intr);
466	if (r)
467	return r;
468
469	amdgpu_sync_entry_free(e);
470	}
471
472	return `0`;
473	}
474
475	/**
476	* amdgpu_sync_free - free the sync object
477	*
478	* @sync: sync object to use
479	*
480	* Free the sync object.
481	*/
482	void amdgpu_sync_free(struct amdgpu_sync *sync)
483	{
484	struct amdgpu_sync_entry *e;
485	struct hlist_node *tmp;
486	unsigned int i;
487
488	hash_for_each_safe(sync->fences, i, tmp, e, node)
489	amdgpu_sync_entry_free(e);
490	}
491
492	/**
493	* amdgpu_sync_init - init sync object subsystem
494	*
495	* Allocate the slab allocator.
496	*/
497	int amdgpu_sync_init(void)
498	{
499	amdgpu_sync_slab = KMEM_CACHE(amdgpu_sync_entry, SLAB_HWCACHE_ALIGN);
500	if (!amdgpu_sync_slab)
501	return -ENOMEM;
502
503	return `0`;
504	}
505
506	/**
507	* amdgpu_sync_fini - fini sync object subsystem
508	*
509	* Free the slab allocator.
510	*/
511	void amdgpu_sync_fini(void)
512	{
513	kmem_cache_destroy(s: amdgpu_sync_slab);
514	}
515

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c