v3d_gem.c source code [linux/drivers/gpu/drm/v3d/v3d_gem.c]

1	// SPDX-License-Identifier: GPL-2.0+
2	/ Copyright (C) 2014-2018 Broadcom /
3
4	#include <linux/device.h>
5	#include <linux/dma-mapping.h>
6	#include <linux/io.h>
7	#include <linux/module.h>
8	#include <linux/platform_device.h>
9	#include <linux/reset.h>
10	#include <linux/sched/signal.h>
11	#include <linux/uaccess.h>
12
13	#include <drm/drm_managed.h>
14	#include <drm/drm_print.h>
15
16	#include "v3d_drv.h"
17	#include "v3d_regs.h"
18	#include "v3d_trace.h"
19
20	static void
21	v3d_init_core(struct v3d_dev v3d, int* core)
22	{
23	/ Set OVRTMUOUT, which means that the texture sampler uniform*
24	* configuration's tmu output type field is used, instead of
25	* using the hardware default behavior based on the texture
26	* type. If you want the default behavior, you can still put
27	* "2" in the indirect texture state's output_type field.
28	*/
29	if (v3d->ver < V3D_GEN_41)
30	V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
31
32	/ Whenever we flush the L2T cache, we always want to flush*
33	* the whole thing.
34	*/
35	V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, `0`);
36	V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~`0`);
37	}
38
39	/ Sets invariant state for the HW. /
40	static void
41	v3d_init_hw_state(struct v3d_dev *v3d)
42	{
43	v3d_init_core(v3d, core: `0`);
44	}
45
46	static void
47	v3d_idle_axi(struct v3d_dev v3d, int* core)
48	{
49	V3D_CORE_WRITE(core, V3D_GMP_CFG(v3d->ver), V3D_GMP_CFG_STOP_REQ);
50
51	if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS(v3d->ver)) &
52	(V3D_GMP_STATUS_RD_COUNT_MASK \|
53	V3D_GMP_STATUS_WR_COUNT_MASK \|
54	V3D_GMP_STATUS_CFG_BUSY)) == `0`, `100`)) {
55	DRM_ERROR("Failed to wait for safe GMP shutdown\n");
56	}
57	}
58
59	static void
60	v3d_idle_gca(struct v3d_dev *v3d)
61	{
62	if (v3d->ver >= V3D_GEN_41)
63	return;
64
65	V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
66
67	if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
68	V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
69	V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, `100`)) {
70	DRM_ERROR("Failed to wait for safe GCA shutdown\n");
71	}
72	}
73
74	static void
75	v3d_reset_by_bridge(struct v3d_dev *v3d)
76	{
77	int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
78
79	if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == `2`) {
80	V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
81	V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
82	V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, `0`);
83
84	/ GFXH-1383: The SW_INIT may cause a stray write to address 0*
85	* of the unit, so reset it to its power-on value here.
86	*/
87	V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
88	} else {
89	WARN_ON_ONCE(V3D_GET_FIELD(version,
90	V3D_TOP_GR_BRIDGE_MAJOR) != `7`);
91	V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
92	V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
93	V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, `0`);
94	}
95	}
96
97	static void
98	v3d_reset_v3d(struct v3d_dev *v3d)
99	{
100	if (v3d->reset)
101	reset_control_reset(rstc: v3d->reset);
102	else
103	v3d_reset_by_bridge(v3d);
104
105	v3d_init_hw_state(v3d);
106	}
107
108	void
109	v3d_reset_sms(struct v3d_dev *v3d)
110	{
111	if (v3d->ver < V3D_GEN_71)
112	return;
113
114	V3D_SMS_WRITE(V3D_SMS_REE_CS, V3D_SET_FIELD(`0x4`, V3D_SMS_STATE));
115
116	if (wait_for(!(V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_REE_CS),
117	V3D_SMS_STATE) == V3D_SMS_ISOLATING_FOR_RESET) &&
118	!(V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_REE_CS),
119	V3D_SMS_STATE) == V3D_SMS_RESETTING), `100`)) {
120	DRM_ERROR("Failed to wait for SMS reset\n");
121	}
122	}
123
124	void
125	v3d_reset(struct v3d_dev *v3d)
126	{
127	struct drm_device *dev = &v3d->drm;
128
129	DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n");
130	DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n",
131	V3D_CORE_READ(`0`, V3D_ERR_STAT));
132	trace_v3d_reset_begin(dev);
133
134	/ XXX: only needed for safe powerdown, not reset. /
135	if (false)
136	v3d_idle_axi(v3d, core: `0`);
137
138	v3d_irq_disable(v3d);
139
140	v3d_idle_gca(v3d);
141	v3d_reset_sms(v3d);
142	v3d_reset_v3d(v3d);
143
144	v3d_mmu_set_page_table(v3d);
145	v3d_irq_reset(v3d);
146
147	v3d_perfmon_stop(v3d, perfmon: v3d->active_perfmon, capture: false);
148
149	trace_v3d_reset_end(dev);
150	}
151
152	static void
153	v3d_flush_l3(struct v3d_dev *v3d)
154	{
155	if (v3d->ver < V3D_GEN_41) {
156	u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
157
158	V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
159	gca_ctrl \| V3D_GCA_CACHE_CTRL_FLUSH);
160
161	if (v3d->ver < V3D_GEN_33) {
162	V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
163	gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
164	}
165	}
166	}
167
168	/ Invalidates the (read-only) L2C cache. This was the L2 cache for*
169	* uniforms and instructions on V3D 3.2.
170	*/
171	static void
172	v3d_invalidate_l2c(struct v3d_dev v3d, int* core)
173	{
174	if (v3d->ver >= V3D_GEN_33)
175	return;
176
177	V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
178	V3D_L2CACTL_L2CCLR \|
179	V3D_L2CACTL_L2CENA);
180	}
181
182	/ Invalidates texture L2 cachelines /
183	static void
184	v3d_flush_l2t(struct v3d_dev v3d, int* core)
185	{
186	/ While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't*
187	* need to wait for completion before dispatching the job --
188	* L2T accesses will be stalled until the flush has completed.
189	* However, we do need to make sure we don't try to trigger a
190	* new flush while the L2_CLEAN queue is trying to
191	* synchronously clean after a job.
192	*/
193	mutex_lock(&v3d->cache_clean_lock);
194	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
195	V3D_L2TCACTL_L2TFLS \|
196	V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
197	mutex_unlock(lock: &v3d->cache_clean_lock);
198	}
199
200	/ Cleans texture L1 and L2 cachelines (writing back dirty data).*
201	*
202	* For cleaning, which happens from the CACHE_CLEAN queue after CSD has
203	* executed, we need to make sure that the clean is done before
204	* signaling job completion. So, we synchronously wait before
205	* returning, and we make sure that L2 invalidates don't happen in the
206	* meantime to confuse our are-we-done checks.
207	*/
208	void
209	v3d_clean_caches(struct v3d_dev *v3d)
210	{
211	struct drm_device *dev = &v3d->drm;
212	int core = `0`;
213
214	trace_v3d_cache_clean_begin(dev);
215
216	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
217	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
218	V3D_L2TCACTL_TMUWCF), `100`)) {
219	DRM_ERROR("Timeout waiting for TMU write combiner flush\n");
220	}
221
222	mutex_lock(&v3d->cache_clean_lock);
223	V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
224	V3D_L2TCACTL_L2TFLS \|
225	V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
226
227	if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
228	V3D_L2TCACTL_L2TFLS), `100`)) {
229	DRM_ERROR("Timeout waiting for L2T clean\n");
230	}
231
232	mutex_unlock(lock: &v3d->cache_clean_lock);
233
234	trace_v3d_cache_clean_end(dev);
235	}
236
237	/ Invalidates the slice caches. These are read-only caches. /
238	static void
239	v3d_invalidate_slices(struct v3d_dev v3d, int* core)
240	{
241	V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
242	V3D_SET_FIELD(`0xf`, V3D_SLCACTL_TVCCS) \|
243	V3D_SET_FIELD(`0xf`, V3D_SLCACTL_TDCCS) \|
244	V3D_SET_FIELD(`0xf`, V3D_SLCACTL_UCC) \|
245	V3D_SET_FIELD(`0xf`, V3D_SLCACTL_ICC));
246	}
247
248	void
249	v3d_invalidate_caches(struct v3d_dev *v3d)
250	{
251	/ Invalidate the caches from the outside in. That way if*
252	* another CL's concurrent use of nearby memory were to pull
253	* an invalidated cacheline back in, we wouldn't leave stale
254	* data in the inner cache.
255	*/
256	v3d_flush_l3(v3d);
257	v3d_invalidate_l2c(v3d, core: `0`);
258	v3d_flush_l2t(v3d, core: `0`);
259	v3d_invalidate_slices(v3d, core: `0`);
260	}
261
262	int
263	v3d_gem_init(struct drm_device *dev)
264	{
265	struct v3d_dev *v3d = to_v3d_dev(dev);
266	u32 pt_size = `4096` * `1024`;
267	int ret, i;
268
269	for (i = `0`; i < V3D_MAX_QUEUES; i++) {
270	struct v3d_queue_state *queue = &v3d->queue[i];
271
272	queue->fence_context = dma_fence_context_alloc(num: `1`);
273	memset(&queue->stats, `0`, sizeof(queue->stats));
274	seqcount_init(&queue->stats.lock);
275
276	spin_lock_init(&queue->queue_lock);
277	spin_lock_init(&queue->fence_lock);
278	}
279
280	spin_lock_init(&v3d->mm_lock);
281	ret = drmm_mutex_init(dev, &v3d->bo_lock);
282	if (ret)
283	return ret;
284	ret = drmm_mutex_init(dev, &v3d->reset_lock);
285	if (ret)
286	return ret;
287	ret = drmm_mutex_init(dev, &v3d->sched_lock);
288	if (ret)
289	return ret;
290	ret = drmm_mutex_init(dev, &v3d->cache_clean_lock);
291	if (ret)
292	return ret;
293
294	/ Note: We don't allocate address 0. Various bits of HW*
295	* treat 0 as special, such as the occlusion query counters
296	* where 0 means "disabled".
297	*/
298	drm_mm_init(mm: &v3d->mm, start: `1`, size: pt_size / sizeof(u32) - `1`);
299
300	v3d->pt = dma_alloc_wc(dev: v3d->drm.dev, size: pt_size,
301	dma_addr: &v3d->pt_paddr,
302	GFP_KERNEL \| __GFP_NOWARN \| __GFP_ZERO);
303	if (!v3d->pt) {
304	drm_mm_takedown(mm: &v3d->mm);
305	dev_err(v3d->drm.dev,
306	"Failed to allocate page tables. Please ensure you have DMA enabled.\n");
307	return -ENOMEM;
308	}
309
310	v3d_init_hw_state(v3d);
311	v3d_mmu_set_page_table(v3d);
312
313	v3d_gemfs_init(v3d);
314
315	ret = v3d_sched_init(v3d);
316	if (ret) {
317	drm_mm_takedown(mm: &v3d->mm);
318	dma_free_coherent(dev: v3d->drm.dev, size: pt_size, cpu_addr: (void *)v3d->pt,
319	dma_handle: v3d->pt_paddr);
320	return ret;
321	}
322
323	return `0`;
324	}
325
326	void
327	v3d_gem_destroy(struct drm_device *dev)
328	{
329	struct v3d_dev *v3d = to_v3d_dev(dev);
330	enum v3d_queue q;
331
332	v3d_sched_fini(v3d);
333	v3d_gemfs_fini(v3d);
334
335	/ Waiting for jobs to finish would need to be done before*
336	* unregistering V3D.
337	*/
338	for (q = `0`; q < V3D_MAX_QUEUES; q++)
339	WARN_ON(v3d->queue[q].active_job);
340
341	drm_mm_takedown(mm: &v3d->mm);
342
343	dma_free_coherent(dev: v3d->drm.dev, size: `4096` * `1024`, cpu_addr: (void *)v3d->pt,
344	dma_handle: v3d->pt_paddr);
345	}
346

source code of linux/drivers/gpu/drm/v3d/v3d_gem.c