v3d_drv.h source code [linux/drivers/gpu/drm/v3d/v3d_drv.h]

1	// SPDX-License-Identifier: GPL-2.0+
2	/ Copyright (C) 2015-2018 Broadcom /
3
4	#include <linux/delay.h>
5	#include <linux/mutex.h>
6	#include <linux/spinlock_types.h>
7	#include <linux/workqueue.h>
8
9	#include <drm/drm_encoder.h>
10	#include <drm/drm_gem.h>
11	#include <drm/drm_gem_shmem_helper.h>
12	#include <drm/gpu_scheduler.h>
13
14	#include "v3d_performance_counters.h"
15
16	#include "uapi/drm/v3d_drm.h"
17
18	struct clk;
19	struct platform_device;
20	struct reset_control;
21
22	#define V3D_MMU_PAGE_SHIFT 12
23	#define V3D_PAGE_FACTOR (PAGE_SIZE >> V3D_MMU_PAGE_SHIFT)
24
25	#define V3D_MAX_QUEUES (V3D_CPU + 1)
26
27	static inline char v3d_queue_to_string(enum* v3d_queue queue)
28	{
29	switch (queue) {
30	case V3D_BIN: return "bin";
31	case V3D_RENDER: return "render";
32	case V3D_TFU: return "tfu";
33	case V3D_CSD: return "csd";
34	case V3D_CACHE_CLEAN: return "cache_clean";
35	case V3D_CPU: return "cpu";
36	}
37	return "UNKNOWN";
38	}
39
40	struct v3d_stats {
41	u64 start_ns;
42	u64 enabled_ns;
43	u64 jobs_completed;
44
45	/*
46	* This seqcount is used to protect the access to the GPU stats
47	* variables. It must be used as, while we are reading the stats,
48	* IRQs can happen and the stats can be updated.
49	*/
50	seqcount_t lock;
51	};
52
53	struct v3d_queue_state {
54	struct drm_gpu_scheduler sched;
55
56	u64 fence_context;
57	u64 emit_seqno;
58
59	/ Stores the GPU stats for this queue in the global context. /
60	struct v3d_stats stats;
61
62	/ Currently active job for this queue /
63	struct v3d_job *active_job;
64	spinlock_t queue_lock;
65	/ Protect dma fence for signalling job completion /
66	spinlock_t fence_lock;
67	};
68
69	/ Performance monitor object. The perform lifetime is controlled by userspace*
70	* using perfmon related ioctls. A perfmon can be attached to a submit_cl
71	* request, and when this is the case, HW perf counters will be activated just
72	* before the submit_cl is submitted to the GPU and disabled when the job is
73	* done. This way, only events related to a specific job will be counted.
74	*/
75	struct v3d_perfmon {
76	/ Tracks the number of users of the perfmon, when this counter reaches*
77	* zero the perfmon is destroyed.
78	*/
79	refcount_t refcnt;
80
81	/ Protects perfmon stop, as it can be invoked from multiple places. /
82	struct mutex lock;
83
84	/ Number of counters activated in this perfmon instance*
85	* (should be less than DRM_V3D_MAX_PERF_COUNTERS).
86	*/
87	u8 ncounters;
88
89	/ Events counted by the HW perf counters. /
90	u8 counters[DRM_V3D_MAX_PERF_COUNTERS];
91
92	/ Storage for counter values. Counters are incremented by the*
93	* HW perf counter values every time the perfmon is attached
94	* to a GPU job. This way, perfmon users don't have to
95	* retrieve the results after each job if they want to track
96	* events covering several submissions. Note that counter
97	* values can't be reset, but you can fake a reset by
98	* destroying the perfmon and creating a new one.
99	*/
100	u64 values[] __counted_by(ncounters);
101	};
102
103	enum v3d_gen {
104	V3D_GEN_33 = `33`,
105	V3D_GEN_41 = `41`,
106	V3D_GEN_42 = `42`,
107	V3D_GEN_71 = `71`,
108	};
109
110	enum v3d_irq {
111	V3D_CORE_IRQ,
112	V3D_HUB_IRQ,
113	V3D_MAX_IRQS,
114	};
115
116	struct v3d_dev {
117	struct drm_device drm;
118
119	/ Short representation (e.g. 33, 41) of the V3D tech version /
120	enum v3d_gen ver;
121
122	/ Short representation (e.g. 5, 6) of the V3D tech revision /
123	int rev;
124
125	bool single_irq_line;
126
127	int irq[V3D_MAX_IRQS];
128
129	struct v3d_perfmon_info perfmon_info;
130
131	void __iomem *hub_regs;
132	void __iomem *core_regs[`3`];
133	void __iomem *bridge_regs;
134	void __iomem *gca_regs;
135	void __iomem *sms_regs;
136	struct clk *clk;
137	struct reset_control *reset;
138
139	/ Virtual and DMA addresses of the single shared page table. /
140	volatile u32 *pt;
141	dma_addr_t pt_paddr;
142
143	/ Virtual and DMA addresses of the MMU's scratch page. When*
144	* a read or write is invalid in the MMU, it will be
145	* redirected here.
146	*/
147	void *mmu_scratch;
148	dma_addr_t mmu_scratch_paddr;
149	/ virtual address bits from V3D to the MMU. /
150	int va_width;
151
152	/ Number of V3D cores. /
153	u32 cores;
154
155	/ Allocator managing the address space. All units are in*
156	* number of pages.
157	*/
158	struct drm_mm mm;
159	spinlock_t mm_lock;
160
161	/*
162	* tmpfs instance used for shmem backed objects
163	*/
164	struct vfsmount *gemfs;
165
166	struct work_struct overflow_mem_work;
167
168	struct v3d_queue_state queue[V3D_MAX_QUEUES];
169
170	/ Used to track the active perfmon if any. /
171	struct v3d_perfmon *active_perfmon;
172
173	/ Protects bo_stats /
174	struct mutex bo_lock;
175
176	/ Lock taken when resetting the GPU, to keep multiple*
177	* processes from trying to park the scheduler threads and
178	* reset at once.
179	*/
180	struct mutex reset_lock;
181
182	/ Lock taken when creating and pushing the GPU scheduler*
183	* jobs, to keep the sched-fence seqnos in order.
184	*/
185	struct mutex sched_lock;
186
187	/ Lock taken during a cache clean and when initiating an L2*
188	* flush, to keep L2 flushes from interfering with the
189	* synchronous L2 cleans.
190	*/
191	struct mutex cache_clean_lock;
192
193	struct {
194	u32 num_allocated;
195	u32 pages_allocated;
196	} bo_stats;
197
198	/ To support a performance analysis tool in user space, we require*
199	* a single, globally configured performance monitor (perfmon) for
200	* all jobs.
201	*/
202	struct v3d_perfmon *global_perfmon;
203
204	/ Global reset counter. The counter must be incremented when*
205	* a GPU reset happens. It must be protected by @reset_lock.
206	*/
207	unsigned int reset_counter;
208	};
209
210	static inline struct v3d_dev *
211	to_v3d_dev(struct drm_device *dev)
212	{
213	return container_of(dev, struct v3d_dev, drm);
214	}
215
216	static inline bool
217	v3d_has_csd(struct v3d_dev *v3d)
218	{
219	return v3d->ver >= V3D_GEN_41;
220	}
221
222	#define v3d_to_pdev(v3d) to_platform_device((v3d)->drm.dev)
223
224	/ The per-fd struct, which tracks the MMU mappings. /
225	struct v3d_file_priv {
226	struct v3d_dev *v3d;
227
228	struct {
229	struct idr idr;
230	struct mutex lock;
231	} perfmon;
232
233	struct drm_sched_entity sched_entity[V3D_MAX_QUEUES];
234
235	/ Stores the GPU stats for a specific queue for this fd. /
236	struct v3d_stats stats[V3D_MAX_QUEUES];
237
238	/ Per-fd reset counter, must be incremented when a job submitted*
239	* by this fd causes a GPU reset. It must be protected by
240	* &struct v3d_dev->reset_lock.
241	*/
242	unsigned int reset_counter;
243	};
244
245	struct v3d_bo {
246	struct drm_gem_shmem_object base;
247
248	struct drm_mm_node node;
249
250	/ List entry for the BO's position in*
251	* v3d_render_job->unref_list
252	*/
253	struct list_head unref_head;
254
255	void *vaddr;
256	};
257
258	static inline struct v3d_bo *
259	to_v3d_bo(struct drm_gem_object *bo)
260	{
261	return (struct v3d_bo *)bo;
262	}
263
264	struct v3d_fence {
265	struct dma_fence base;
266	struct drm_device *dev;
267	/ v3d seqno for signaled() test /
268	u64 seqno;
269	enum v3d_queue queue;
270	};
271
272	static inline struct v3d_fence *
273	to_v3d_fence(struct dma_fence *fence)
274	{
275	return (struct v3d_fence *)fence;
276	}
277
278	#define V3D_READ(offset) readl(v3d->hub_regs + offset)
279	#define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset)
280
281	#define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset)
282	#define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset)
283
284	#define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset)
285	#define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset)
286
287	#define V3D_SMS_IDLE 0x0
288	#define V3D_SMS_ISOLATING_FOR_RESET 0xa
289	#define V3D_SMS_RESETTING 0xb
290	#define V3D_SMS_ISOLATING_FOR_POWER_OFF 0xc
291	#define V3D_SMS_POWER_OFF_STATE 0xd
292
293	#define V3D_SMS_READ(offset) readl(v3d->sms_regs + (offset))
294	#define V3D_SMS_WRITE(offset, val) writel(val, v3d->sms_regs + (offset))
295
296	#define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset)
297	#define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset)
298
299	struct v3d_job {
300	struct drm_sched_job base;
301
302	struct kref refcount;
303
304	struct v3d_dev *v3d;
305
306	/ This is the array of BOs that were looked up at the start*
307	* of submission.
308	*/
309	struct drm_gem_object **bo;
310	u32 bo_count;
311
312	/ v3d fence to be signaled by IRQ handler when the job is complete. /
313	struct dma_fence *irq_fence;
314
315	/ scheduler fence for when the job is considered complete and*
316	* the BO reservations can be released.
317	*/
318	struct dma_fence *done_fence;
319
320	/ Pointer to a performance monitor object if the user requested it,*
321	* NULL otherwise.
322	*/
323	struct v3d_perfmon *perfmon;
324
325	/ File descriptor of the process that submitted the job that could be used*
326	* to collect per-process information about the GPU.
327	*/
328	struct v3d_file_priv *file_priv;
329
330	/ Callback for the freeing of the job on refcount going to 0. /
331	void (free)(struct* kref *ref);
332	};
333
334	struct v3d_bin_job {
335	struct v3d_job base;
336
337	/ GPU virtual addresses of the start/end of the CL job. /
338	u32 start, end;
339
340	u32 timedout_ctca, timedout_ctra;
341
342	/ Corresponding render job, for attaching our overflow memory. /
343	struct v3d_render_job *render;
344
345	/ Submitted tile memory allocation start/size, tile state. /
346	u32 qma, qms, qts;
347	};
348
349	struct v3d_render_job {
350	struct v3d_job base;
351
352	/ GPU virtual addresses of the start/end of the CL job. /
353	u32 start, end;
354
355	u32 timedout_ctca, timedout_ctra;
356
357	/ List of overflow BOs used in the job that need to be*
358	* released once the job is complete.
359	*/
360	struct list_head unref_list;
361	};
362
363	struct v3d_tfu_job {
364	struct v3d_job base;
365
366	struct drm_v3d_submit_tfu args;
367	};
368
369	struct v3d_csd_job {
370	struct v3d_job base;
371
372	u32 timedout_batches;
373
374	struct drm_v3d_submit_csd args;
375	};
376
377	enum v3d_cpu_job_type {
378	V3D_CPU_JOB_TYPE_INDIRECT_CSD = `1`,
379	V3D_CPU_JOB_TYPE_TIMESTAMP_QUERY,
380	V3D_CPU_JOB_TYPE_RESET_TIMESTAMP_QUERY,
381	V3D_CPU_JOB_TYPE_COPY_TIMESTAMP_QUERY,
382	V3D_CPU_JOB_TYPE_RESET_PERFORMANCE_QUERY,
383	V3D_CPU_JOB_TYPE_COPY_PERFORMANCE_QUERY,
384	};
385
386	struct v3d_timestamp_query {
387	/ Offset of this query in the timestamp BO for its value. /
388	u32 offset;
389
390	/ Syncobj that indicates the timestamp availability /
391	struct drm_syncobj *syncobj;
392	};
393
394	struct v3d_performance_query {
395	/ Performance monitor IDs for this query /
396	u32 *kperfmon_ids;
397
398	/ Syncobj that indicates the query availability /
399	struct drm_syncobj *syncobj;
400	};
401
402	struct v3d_indirect_csd_info {
403	/ Indirect CSD /
404	struct v3d_csd_job *job;
405
406	/ Clean cache job associated to the Indirect CSD job /
407	struct v3d_job *clean_job;
408
409	/ Offset within the BO where the workgroup counts are stored /
410	u32 offset;
411
412	/ Workgroups size /
413	u32 wg_size;
414
415	/ Indices of the uniforms with the workgroup dispatch counts*
416	* in the uniform stream.
417	*/
418	u32 wg_uniform_offsets[`3`];
419
420	/ Indirect BO /
421	struct drm_gem_object *indirect;
422
423	/ Context of the Indirect CSD job /
424	struct ww_acquire_ctx acquire_ctx;
425	};
426
427	struct v3d_timestamp_query_info {
428	struct v3d_timestamp_query *queries;
429
430	u32 count;
431	};
432
433	struct v3d_performance_query_info {
434	struct v3d_performance_query *queries;
435
436	/ Number of performance queries /
437	u32 count;
438
439	/ Number of performance monitors related to that query pool /
440	u32 nperfmons;
441
442	/ Number of performance counters related to that query pool /
443	u32 ncounters;
444	};
445
446	struct v3d_copy_query_results_info {
447	/ Define if should write to buffer using 64 or 32 bits /
448	bool do_64bit;
449
450	/ Define if it can write to buffer even if the query is not available /
451	bool do_partial;
452
453	/ Define if it should write availability bit to buffer /
454	bool availability_bit;
455
456	/ Offset of the copy buffer in the BO /
457	u32 offset;
458
459	/ Stride of the copy buffer in the BO /
460	u32 stride;
461	};
462
463	struct v3d_cpu_job {
464	struct v3d_job base;
465
466	enum v3d_cpu_job_type job_type;
467
468	struct v3d_indirect_csd_info indirect_csd;
469
470	struct v3d_timestamp_query_info timestamp_query;
471
472	struct v3d_copy_query_results_info copy;
473
474	struct v3d_performance_query_info performance_query;
475	};
476
477	typedef void (v3d_cpu_job_fn)(struct* v3d_cpu_job *);
478
479	struct v3d_submit_outsync {
480	struct drm_syncobj *syncobj;
481	};
482
483	struct v3d_submit_ext {
484	u32 flags;
485	u32 wait_stage;
486
487	u32 in_sync_count;
488	u64 in_syncs;
489
490	u32 out_sync_count;
491	struct v3d_submit_outsync *out_syncs;
492	};
493
494	/**
495	* __wait_for - magic wait macro
496	*
497	* Macro to help avoid open coding check/wait/timeout patterns. Note that it's
498	* important that we check the condition again after having timed out, since the
499	* timeout could be due to preemption or similar and we've never had a chance to
500	* check the condition before the timeout.
501	*/
502	#define __wait_for(OP, COND, US, Wmin, Wmax) ({ \
503	const ktime_t end__ = ktime_add_ns(ktime_get_raw(), 1000ll * (US)); \
504	long wait__ = (Wmin); /* recommended min for usleep is 10 us */ \
505	int ret__; \
506	might_sleep(); \
507	for (;;) { \
508	const bool expired__ = ktime_after(ktime_get_raw(), end__); \
509	OP; \
510	/* Guarantee COND check prior to timeout */ \
511	barrier(); \
512	if (COND) { \
513	ret__ = 0; \
514	break; \
515	} \
516	if (expired__) { \
517	ret__ = -ETIMEDOUT; \
518	break; \
519	} \
520	usleep_range(wait__, wait__ * 2); \
521	if (wait__ < (Wmax)) \
522	wait__ <<= 1; \
523	} \
524	ret__; \
525	})
526
527	#define _wait_for(COND, US, Wmin, Wmax) __wait_for(, (COND), (US), (Wmin), \
528	(Wmax))
529	#define wait_for(COND, MS) _wait_for((COND), (MS) * 1000, 10, 1000)
530
531	static inline unsigned long nsecs_to_jiffies_timeout(const u64 n)
532	{
533	/ nsecs_to_jiffies64() does not guard against overflow /
534	if ((NSEC_PER_SEC % HZ) != `0` &&
535	div_u64(dividend: n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ)
536	return MAX_JIFFY_OFFSET;
537
538	return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + `1`);
539	}
540
541	/ v3d_bo.c /
542	struct drm_gem_object v3d_create_object(struct* drm_device *dev, size_t size);
543	void v3d_free_object(struct drm_gem_object *gem_obj);
544	struct v3d_bo v3d_bo_create(struct* drm_device dev, struct* drm_file *file_priv,
545	size_t size);
546	void v3d_get_bo_vaddr(struct v3d_bo *bo);
547	void v3d_put_bo_vaddr(struct v3d_bo *bo);
548	int v3d_create_bo_ioctl(struct drm_device dev, void* *data,
549	struct drm_file *file_priv);
550	int v3d_mmap_bo_ioctl(struct drm_device dev, void* *data,
551	struct drm_file *file_priv);
552	int v3d_get_bo_offset_ioctl(struct drm_device dev, void* *data,
553	struct drm_file *file_priv);
554	int v3d_wait_bo_ioctl(struct drm_device dev, void* *data,
555	struct drm_file *file_priv);
556	struct drm_gem_object v3d_prime_import_sg_table(struct* drm_device *dev,
557	struct dma_buf_attachment *attach,
558	struct sg_table *sgt);
559
560	/ v3d_debugfs.c /
561	void v3d_debugfs_init(struct drm_minor *minor);
562
563	/ v3d_drv.c /
564	void v3d_get_stats(const struct v3d_stats *stats, u64 timestamp,
565	u64 active_runtime, u64 jobs_completed);
566
567	/ v3d_fence.c /
568	extern const struct dma_fence_ops v3d_fence_ops;
569	struct dma_fence v3d_fence_create(struct* v3d_dev v3d, enum* v3d_queue q);
570
571	/ v3d_gem.c /
572	int v3d_gem_init(struct drm_device *dev);
573	void v3d_gem_destroy(struct drm_device *dev);
574	void v3d_reset_sms(struct v3d_dev *v3d);
575	void v3d_reset(struct v3d_dev *v3d);
576	void v3d_invalidate_caches(struct v3d_dev *v3d);
577	void v3d_clean_caches(struct v3d_dev *v3d);
578
579	/ v3d_gemfs.c /
580	extern bool super_pages;
581	void v3d_gemfs_init(struct v3d_dev *v3d);
582	void v3d_gemfs_fini(struct v3d_dev *v3d);
583
584	/ v3d_submit.c /
585	void v3d_job_cleanup(struct v3d_job *job);
586	void v3d_job_put(struct v3d_job *job);
587	int v3d_submit_cl_ioctl(struct drm_device dev, void* *data,
588	struct drm_file *file_priv);
589	int v3d_submit_tfu_ioctl(struct drm_device dev, void* *data,
590	struct drm_file *file_priv);
591	int v3d_submit_csd_ioctl(struct drm_device dev, void* *data,
592	struct drm_file *file_priv);
593	int v3d_submit_cpu_ioctl(struct drm_device dev, void* *data,
594	struct drm_file *file_priv);
595
596	/ v3d_irq.c /
597	int v3d_irq_init(struct v3d_dev *v3d);
598	void v3d_irq_enable(struct v3d_dev *v3d);
599	void v3d_irq_disable(struct v3d_dev *v3d);
600	void v3d_irq_reset(struct v3d_dev *v3d);
601
602	/ v3d_mmu.c /
603	int v3d_mmu_flush_all(struct v3d_dev *v3d);
604	int v3d_mmu_set_page_table(struct v3d_dev *v3d);
605	void v3d_mmu_insert_ptes(struct v3d_bo *bo);
606	void v3d_mmu_remove_ptes(struct v3d_bo *bo);
607
608	/ v3d_sched.c /
609	void v3d_timestamp_query_info_free(struct v3d_timestamp_query_info *query_info,
610	unsigned int count);
611	void v3d_performance_query_info_free(struct v3d_performance_query_info *query_info,
612	unsigned int count);
613	void v3d_job_update_stats(struct v3d_job job, enum* v3d_queue q);
614	int v3d_sched_init(struct v3d_dev *v3d);
615	void v3d_sched_fini(struct v3d_dev *v3d);
616
617	/ v3d_perfmon.c /
618	void v3d_perfmon_init(struct v3d_dev *v3d);
619	void v3d_perfmon_get(struct v3d_perfmon *perfmon);
620	void v3d_perfmon_put(struct v3d_perfmon *perfmon);
621	void v3d_perfmon_start(struct v3d_dev v3d, struct* v3d_perfmon *perfmon);
622	void v3d_perfmon_stop(struct v3d_dev v3d, struct* v3d_perfmon *perfmon,
623	bool capture);
624	struct v3d_perfmon v3d_perfmon_find(struct* v3d_file_priv v3d_priv, int* id);
625	void v3d_perfmon_open_file(struct v3d_file_priv *v3d_priv);
626	void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv);
627	int v3d_perfmon_create_ioctl(struct drm_device dev, void* *data,
628	struct drm_file *file_priv);
629	int v3d_perfmon_destroy_ioctl(struct drm_device dev, void* *data,
630	struct drm_file *file_priv);
631	int v3d_perfmon_get_values_ioctl(struct drm_device dev, void* *data,
632	struct drm_file *file_priv);
633	int v3d_perfmon_get_counter_ioctl(struct drm_device dev, void* *data,
634	struct drm_file *file_priv);
635	int v3d_perfmon_set_global_ioctl(struct drm_device dev, void* *data,
636	struct drm_file *file_priv);
637
638	/ v3d_sysfs.c /
639	int v3d_sysfs_init(struct device *dev);
640	void v3d_sysfs_destroy(struct device *dev);
641

source code of linux/drivers/gpu/drm/v3d/v3d_drv.h