1// SPDX-License-Identifier: GPL-2.0+
2/* Copyright (C) 2014-2018 Broadcom */
3
4#include <linux/device.h>
5#include <linux/dma-mapping.h>
6#include <linux/io.h>
7#include <linux/module.h>
8#include <linux/platform_device.h>
9#include <linux/reset.h>
10#include <linux/sched/signal.h>
11#include <linux/uaccess.h>
12
13#include <drm/drm_managed.h>
14#include <drm/drm_print.h>
15
16#include "v3d_drv.h"
17#include "v3d_regs.h"
18#include "v3d_trace.h"
19
20static void
21v3d_init_core(struct v3d_dev *v3d, int core)
22{
23 /* Set OVRTMUOUT, which means that the texture sampler uniform
24 * configuration's tmu output type field is used, instead of
25 * using the hardware default behavior based on the texture
26 * type. If you want the default behavior, you can still put
27 * "2" in the indirect texture state's output_type field.
28 */
29 if (v3d->ver < V3D_GEN_41)
30 V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT);
31
32 /* Whenever we flush the L2T cache, we always want to flush
33 * the whole thing.
34 */
35 V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0);
36 V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0);
37}
38
39/* Sets invariant state for the HW. */
40static void
41v3d_init_hw_state(struct v3d_dev *v3d)
42{
43 v3d_init_core(v3d, core: 0);
44}
45
46static void
47v3d_idle_axi(struct v3d_dev *v3d, int core)
48{
49 V3D_CORE_WRITE(core, V3D_GMP_CFG(v3d->ver), V3D_GMP_CFG_STOP_REQ);
50
51 if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS(v3d->ver)) &
52 (V3D_GMP_STATUS_RD_COUNT_MASK |
53 V3D_GMP_STATUS_WR_COUNT_MASK |
54 V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) {
55 DRM_ERROR("Failed to wait for safe GMP shutdown\n");
56 }
57}
58
59static void
60v3d_idle_gca(struct v3d_dev *v3d)
61{
62 if (v3d->ver >= V3D_GEN_41)
63 return;
64
65 V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN);
66
67 if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) &
68 V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) ==
69 V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) {
70 DRM_ERROR("Failed to wait for safe GCA shutdown\n");
71 }
72}
73
74static void
75v3d_reset_by_bridge(struct v3d_dev *v3d)
76{
77 int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION);
78
79 if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) {
80 V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0,
81 V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT);
82 V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0);
83
84 /* GFXH-1383: The SW_INIT may cause a stray write to address 0
85 * of the unit, so reset it to its power-on value here.
86 */
87 V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK);
88 } else {
89 WARN_ON_ONCE(V3D_GET_FIELD(version,
90 V3D_TOP_GR_BRIDGE_MAJOR) != 7);
91 V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1,
92 V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT);
93 V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0);
94 }
95}
96
97static void
98v3d_reset_v3d(struct v3d_dev *v3d)
99{
100 if (v3d->reset)
101 reset_control_reset(rstc: v3d->reset);
102 else
103 v3d_reset_by_bridge(v3d);
104
105 v3d_init_hw_state(v3d);
106}
107
108void
109v3d_reset_sms(struct v3d_dev *v3d)
110{
111 if (v3d->ver < V3D_GEN_71)
112 return;
113
114 V3D_SMS_WRITE(V3D_SMS_REE_CS, V3D_SET_FIELD(0x4, V3D_SMS_STATE));
115
116 if (wait_for(!(V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_REE_CS),
117 V3D_SMS_STATE) == V3D_SMS_ISOLATING_FOR_RESET) &&
118 !(V3D_GET_FIELD(V3D_SMS_READ(V3D_SMS_REE_CS),
119 V3D_SMS_STATE) == V3D_SMS_RESETTING), 100)) {
120 DRM_ERROR("Failed to wait for SMS reset\n");
121 }
122}
123
124void
125v3d_reset(struct v3d_dev *v3d)
126{
127 struct drm_device *dev = &v3d->drm;
128
129 DRM_DEV_ERROR(dev->dev, "Resetting GPU for hang.\n");
130 DRM_DEV_ERROR(dev->dev, "V3D_ERR_STAT: 0x%08x\n",
131 V3D_CORE_READ(0, V3D_ERR_STAT));
132 trace_v3d_reset_begin(dev);
133
134 /* XXX: only needed for safe powerdown, not reset. */
135 if (false)
136 v3d_idle_axi(v3d, core: 0);
137
138 v3d_irq_disable(v3d);
139
140 v3d_idle_gca(v3d);
141 v3d_reset_sms(v3d);
142 v3d_reset_v3d(v3d);
143
144 v3d_mmu_set_page_table(v3d);
145 v3d_irq_reset(v3d);
146
147 v3d_perfmon_stop(v3d, perfmon: v3d->active_perfmon, capture: false);
148
149 trace_v3d_reset_end(dev);
150}
151
152static void
153v3d_flush_l3(struct v3d_dev *v3d)
154{
155 if (v3d->ver < V3D_GEN_41) {
156 u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL);
157
158 V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
159 gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH);
160
161 if (v3d->ver < V3D_GEN_33) {
162 V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL,
163 gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH);
164 }
165 }
166}
167
168/* Invalidates the (read-only) L2C cache. This was the L2 cache for
169 * uniforms and instructions on V3D 3.2.
170 */
171static void
172v3d_invalidate_l2c(struct v3d_dev *v3d, int core)
173{
174 if (v3d->ver >= V3D_GEN_33)
175 return;
176
177 V3D_CORE_WRITE(core, V3D_CTL_L2CACTL,
178 V3D_L2CACTL_L2CCLR |
179 V3D_L2CACTL_L2CENA);
180}
181
182/* Invalidates texture L2 cachelines */
183static void
184v3d_flush_l2t(struct v3d_dev *v3d, int core)
185{
186 /* While there is a busy bit (V3D_L2TCACTL_L2TFLS), we don't
187 * need to wait for completion before dispatching the job --
188 * L2T accesses will be stalled until the flush has completed.
189 * However, we do need to make sure we don't try to trigger a
190 * new flush while the L2_CLEAN queue is trying to
191 * synchronously clean after a job.
192 */
193 mutex_lock(&v3d->cache_clean_lock);
194 V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
195 V3D_L2TCACTL_L2TFLS |
196 V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM));
197 mutex_unlock(lock: &v3d->cache_clean_lock);
198}
199
200/* Cleans texture L1 and L2 cachelines (writing back dirty data).
201 *
202 * For cleaning, which happens from the CACHE_CLEAN queue after CSD has
203 * executed, we need to make sure that the clean is done before
204 * signaling job completion. So, we synchronously wait before
205 * returning, and we make sure that L2 invalidates don't happen in the
206 * meantime to confuse our are-we-done checks.
207 */
208void
209v3d_clean_caches(struct v3d_dev *v3d)
210{
211 struct drm_device *dev = &v3d->drm;
212 int core = 0;
213
214 trace_v3d_cache_clean_begin(dev);
215
216 V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF);
217 if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
218 V3D_L2TCACTL_TMUWCF), 100)) {
219 DRM_ERROR("Timeout waiting for TMU write combiner flush\n");
220 }
221
222 mutex_lock(&v3d->cache_clean_lock);
223 V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL,
224 V3D_L2TCACTL_L2TFLS |
225 V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAN, V3D_L2TCACTL_FLM));
226
227 if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) &
228 V3D_L2TCACTL_L2TFLS), 100)) {
229 DRM_ERROR("Timeout waiting for L2T clean\n");
230 }
231
232 mutex_unlock(lock: &v3d->cache_clean_lock);
233
234 trace_v3d_cache_clean_end(dev);
235}
236
237/* Invalidates the slice caches. These are read-only caches. */
238static void
239v3d_invalidate_slices(struct v3d_dev *v3d, int core)
240{
241 V3D_CORE_WRITE(core, V3D_CTL_SLCACTL,
242 V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) |
243 V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) |
244 V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) |
245 V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC));
246}
247
248void
249v3d_invalidate_caches(struct v3d_dev *v3d)
250{
251 /* Invalidate the caches from the outside in. That way if
252 * another CL's concurrent use of nearby memory were to pull
253 * an invalidated cacheline back in, we wouldn't leave stale
254 * data in the inner cache.
255 */
256 v3d_flush_l3(v3d);
257 v3d_invalidate_l2c(v3d, core: 0);
258 v3d_flush_l2t(v3d, core: 0);
259 v3d_invalidate_slices(v3d, core: 0);
260}
261
262int
263v3d_gem_init(struct drm_device *dev)
264{
265 struct v3d_dev *v3d = to_v3d_dev(dev);
266 u32 pt_size = 4096 * 1024;
267 int ret, i;
268
269 for (i = 0; i < V3D_MAX_QUEUES; i++) {
270 struct v3d_queue_state *queue = &v3d->queue[i];
271
272 queue->fence_context = dma_fence_context_alloc(num: 1);
273 memset(&queue->stats, 0, sizeof(queue->stats));
274 seqcount_init(&queue->stats.lock);
275
276 spin_lock_init(&queue->queue_lock);
277 spin_lock_init(&queue->fence_lock);
278 }
279
280 spin_lock_init(&v3d->mm_lock);
281 ret = drmm_mutex_init(dev, &v3d->bo_lock);
282 if (ret)
283 return ret;
284 ret = drmm_mutex_init(dev, &v3d->reset_lock);
285 if (ret)
286 return ret;
287 ret = drmm_mutex_init(dev, &v3d->sched_lock);
288 if (ret)
289 return ret;
290 ret = drmm_mutex_init(dev, &v3d->cache_clean_lock);
291 if (ret)
292 return ret;
293
294 /* Note: We don't allocate address 0. Various bits of HW
295 * treat 0 as special, such as the occlusion query counters
296 * where 0 means "disabled".
297 */
298 drm_mm_init(mm: &v3d->mm, start: 1, size: pt_size / sizeof(u32) - 1);
299
300 v3d->pt = dma_alloc_wc(dev: v3d->drm.dev, size: pt_size,
301 dma_addr: &v3d->pt_paddr,
302 GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
303 if (!v3d->pt) {
304 drm_mm_takedown(mm: &v3d->mm);
305 dev_err(v3d->drm.dev,
306 "Failed to allocate page tables. Please ensure you have DMA enabled.\n");
307 return -ENOMEM;
308 }
309
310 v3d_init_hw_state(v3d);
311 v3d_mmu_set_page_table(v3d);
312
313 v3d_gemfs_init(v3d);
314
315 ret = v3d_sched_init(v3d);
316 if (ret) {
317 drm_mm_takedown(mm: &v3d->mm);
318 dma_free_coherent(dev: v3d->drm.dev, size: pt_size, cpu_addr: (void *)v3d->pt,
319 dma_handle: v3d->pt_paddr);
320 return ret;
321 }
322
323 return 0;
324}
325
326void
327v3d_gem_destroy(struct drm_device *dev)
328{
329 struct v3d_dev *v3d = to_v3d_dev(dev);
330 enum v3d_queue q;
331
332 v3d_sched_fini(v3d);
333 v3d_gemfs_fini(v3d);
334
335 /* Waiting for jobs to finish would need to be done before
336 * unregistering V3D.
337 */
338 for (q = 0; q < V3D_MAX_QUEUES; q++)
339 WARN_ON(v3d->queue[q].active_job);
340
341 drm_mm_takedown(mm: &v3d->mm);
342
343 dma_free_coherent(dev: v3d->drm.dev, size: 4096 * 1024, cpu_addr: (void *)v3d->pt,
344 dma_handle: v3d->pt_paddr);
345}
346

source code of linux/drivers/gpu/drm/v3d/v3d_gem.c