xe_bb.c source code [linux/drivers/gpu/drm/xe/xe_bb.c]

1	// SPDX-License-Identifier: MIT
2	/*
3	* Copyright © 2022 Intel Corporation
4	*/
5
6	#include "xe_bb.h"
7
8	#include "instructions/xe_mi_commands.h"
9	#include "xe_assert.h"
10	#include "xe_device.h"
11	#include "xe_exec_queue_types.h"
12	#include "xe_gt.h"
13	#include "xe_hw_fence.h"
14	#include "xe_sa.h"
15	#include "xe_sched_job.h"
16	#include "xe_vm_types.h"
17
18	static int bb_prefetch(struct xe_gt *gt)
19	{
20	struct xe_device *xe = gt_to_xe(gt);
21
22	if (GRAPHICS_VERx100(xe) >= `1250` && xe_gt_is_main_type(gt))
23	/*
24	* RCS and CCS require 1K, although other engines would be
25	* okay with 512.
26	*/
27	return SZ_1K;
28	else
29	return SZ_512;
30	}
31
32	struct xe_bb xe_bb_new(struct* xe_gt *gt, u32 dwords, bool usm)
33	{
34	struct xe_tile *tile = gt_to_tile(gt);
35	struct xe_bb bb = kmalloc(sizeof(bb), GFP_KERNEL);
36	int err;
37
38	if (!bb)
39	return ERR_PTR(error: -ENOMEM);
40
41	/*
42	* We need to allocate space for the requested number of dwords,
43	* one additional MI_BATCH_BUFFER_END dword, and additional buffer
44	* space to accommodate the platform-specific hardware prefetch
45	* requirements.
46	*/
47	bb->bo = xe_sa_bo_new(sa_manager: !usm ? tile->mem.kernel_bb_pool : gt->usm.bb_pool,
48	size: `4` * (dwords + `1`) + bb_prefetch(gt));
49	if (IS_ERR(ptr: bb->bo)) {
50	err = PTR_ERR(ptr: bb->bo);
51	goto err;
52	}
53
54	bb->cs = xe_sa_bo_cpu_addr(sa: bb->bo);
55	bb->len = `0`;
56
57	return bb;
58	err:
59	kfree(objp: bb);
60	return ERR_PTR(error: err);
61	}
62
63	struct xe_bb xe_bb_ccs_new(struct* xe_gt *gt, u32 dwords,
64	enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
65	{
66	struct xe_bb bb = kmalloc(sizeof(bb), GFP_KERNEL);
67	struct xe_device *xe = gt_to_xe(gt);
68	struct xe_sa_manager *bb_pool;
69	int err;
70
71	if (!bb)
72	return ERR_PTR(error: -ENOMEM);
73	/*
74	* We need to allocate space for the requested number of dwords &
75	* one additional MI_BATCH_BUFFER_END dword. Since the whole SA
76	* is submitted to HW, we need to make sure that the last instruction
77	* is not over written when the last chunk of SA is allocated for BB.
78	* So, this extra DW acts as a guard here.
79	*/
80
81	bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
82	bb->bo = xe_sa_bo_new(sa_manager: bb_pool, size: `4` * (dwords + `1`));
83
84	if (IS_ERR(ptr: bb->bo)) {
85	err = PTR_ERR(ptr: bb->bo);
86	goto err;
87	}
88
89	bb->cs = xe_sa_bo_cpu_addr(sa: bb->bo);
90	bb->len = `0`;
91
92	return bb;
93	err:
94	kfree(objp: bb);
95	return ERR_PTR(error: err);
96	}
97
98	static struct xe_sched_job *
99	__xe_bb_create_job(struct xe_exec_queue q, struct* xe_bb bb, u64 addr)
100	{
101	u32 size = drm_suballoc_size(sa: bb->bo);
102
103	if (bb->len == `0` \|\| bb->cs[bb->len - `1`] != MI_BATCH_BUFFER_END)
104	bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
105
106	xe_gt_assert(q->gt, bb->len * `4` + bb_prefetch(q->gt) <= size);
107
108	xe_sa_bo_flush_write(sa_bo: bb->bo);
109
110	return xe_sched_job_create(q, batch_addr: addr);
111	}
112
113	struct xe_sched_job xe_bb_create_migration_job(struct* xe_exec_queue *q,
114	struct xe_bb *bb,
115	u64 batch_base_ofs,
116	u32 second_idx)
117	{
118	u64 addr[`2`] = {
119	batch_base_ofs + drm_suballoc_soffset(sa: bb->bo),
120	batch_base_ofs + drm_suballoc_soffset(sa: bb->bo) +
121	`4` * second_idx,
122	};
123
124	xe_gt_assert(q->gt, second_idx <= bb->len);
125	xe_gt_assert(q->gt, xe_sched_job_is_migration(q));
126	xe_gt_assert(q->gt, q->width == `1`);
127
128	return __xe_bb_create_job(q, bb, addr);
129	}
130
131	struct xe_sched_job xe_bb_create_job(struct* xe_exec_queue *q,
132	struct xe_bb *bb)
133	{
134	u64 addr = xe_sa_bo_gpu_addr(sa: bb->bo);
135
136	xe_gt_assert(q->gt, !xe_sched_job_is_migration(q));
137	xe_gt_assert(q->gt, q->width == `1`);
138	return __xe_bb_create_job(q, bb, addr: &addr);
139	}
140
141	void xe_bb_free(struct xe_bb bb, struct* dma_fence *fence)
142	{
143	if (!bb)
144	return;
145
146	xe_sa_bo_free(sa_bo: bb->bo, fence);
147	kfree(objp: bb);
148	}
149

source code of linux/drivers/gpu/drm/xe/xe_bb.c