vc4_validate_shaders.c source code [linux/drivers/gpu/drm/vc4/vc4_validate_shaders.c]

1	/*
2	* Copyright © 2014 Broadcom
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice (including the next
12	* paragraph) shall be included in all copies or substantial portions of the
13	* Software.
14	*
15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21	* IN THE SOFTWARE.
22	*/
23
24	/**
25	* DOC: Shader validator for VC4.
26	*
27	* Since the VC4 has no IOMMU between it and system memory, a user
28	* with access to execute shaders could escalate privilege by
29	* overwriting system memory (using the VPM write address register in
30	* the general-purpose DMA mode) or reading system memory it shouldn't
31	* (reading it as a texture, uniform data, or direct-addressed TMU
32	* lookup).
33	*
34	* The shader validator walks over a shader's BO, ensuring that its
35	* accesses are appropriately bounded, and recording where texture
36	* accesses are made so that we can do relocations for them in the
37	* uniform stream.
38	*
39	* Shader BO are immutable for their lifetimes (enforced by not
40	* allowing mmaps, GEM prime export, or rendering to from a CL), so
41	* this validation is only performed at BO creation time.
42	*/
43
44	#include <drm/drm_print.h>
45
46	#include "vc4_drv.h"
47	#include "vc4_qpu_defines.h"
48
49	#define LIVE_REG_COUNT (32 + 32 + 4)
50
51	struct vc4_shader_validation_state {
52	/ Current IP being validated. /
53	uint32_t ip;
54
55	/ IP at the end of the BO, do not read shader[max_ip] /
56	uint32_t max_ip;
57
58	uint64_t *shader;
59
60	struct vc4_texture_sample_info tmu_setup[`2`];
61	int tmu_write_count[`2`];
62
63	/ For registers that were last written to by a MIN instruction with*
64	* one argument being a uniform, the address of the uniform.
65	* Otherwise, ~0.
66	*
67	* This is used for the validation of direct address memory reads.
68	*/
69	uint32_t live_min_clamp_offsets[LIVE_REG_COUNT];
70	bool live_max_clamp_regs[LIVE_REG_COUNT];
71	uint32_t live_immediates[LIVE_REG_COUNT];
72
73	/ Bitfield of which IPs are used as branch targets.*
74	*
75	* Used for validation that the uniform stream is updated at the right
76	* points and clearing the texturing/clamping state.
77	*/
78	unsigned long *branch_targets;
79
80	/ Set when entering a basic block, and cleared when the uniform*
81	* address update is found. This is used to make sure that we don't
82	* read uniforms when the address is undefined.
83	*/
84	bool needs_uniform_address_update;
85
86	/ Set when we find a backwards branch. If the branch is backwards,*
87	* the taraget is probably doing an address reset to read uniforms,
88	* and so we need to be sure that a uniforms address is present in the
89	* stream, even if the shader didn't need to read uniforms in later
90	* basic blocks.
91	*/
92	bool needs_uniform_address_for_loop;
93
94	/ Set when we find an instruction writing the top half of the*
95	* register files. If we allowed writing the unusable regs in
96	* a threaded shader, then the other shader running on our
97	* QPU's clamp validation would be invalid.
98	*/
99	bool all_registers_used;
100	};
101
102	static uint32_t
103	waddr_to_live_reg_index(uint32_t waddr, bool is_b)
104	{
105	if (waddr < `32`) {
106	if (is_b)
107	return `32` + waddr;
108	else
109	return waddr;
110	} else if (waddr <= QPU_W_ACC3) {
111	return `64` + waddr - QPU_W_ACC0;
112	} else {
113	return ~`0`;
114	}
115	}
116
117	static uint32_t
118	raddr_add_a_to_live_reg_index(uint64_t inst)
119	{
120	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
121	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
122	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
123	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
124
125	if (add_a == QPU_MUX_A)
126	return raddr_a;
127	else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
128	return `32` + raddr_b;
129	else if (add_a <= QPU_MUX_R3)
130	return `64` + add_a;
131	else
132	return ~`0`;
133	}
134
135	static bool
136	live_reg_is_upper_half(uint32_t lri)
137	{
138	return (lri >= `16` && lri < `32`) \|\|
139	(lri >= `32` + `16` && lri < `32` + `32`);
140	}
141
142	static bool
143	is_tmu_submit(uint32_t waddr)
144	{
145	return (waddr == QPU_W_TMU0_S \|\|
146	waddr == QPU_W_TMU1_S);
147	}
148
149	static bool
150	is_tmu_write(uint32_t waddr)
151	{
152	return (waddr >= QPU_W_TMU0_S &&
153	waddr <= QPU_W_TMU1_B);
154	}
155
156	static bool
157	record_texture_sample(struct vc4_validated_shader_info *validated_shader,
158	struct vc4_shader_validation_state *validation_state,
159	int tmu)
160	{
161	uint32_t s = validated_shader->num_texture_samples;
162	int i;
163	struct vc4_texture_sample_info *temp_samples;
164
165	temp_samples = krealloc(validated_shader->texture_samples,
166	(s + `1`) * sizeof(*temp_samples),
167	GFP_KERNEL);
168	if (!temp_samples)
169	return false;
170
171	memcpy(&temp_samples[s],
172	&validation_state->tmu_setup[tmu],
173	sizeof(*temp_samples));
174
175	validated_shader->num_texture_samples = s + `1`;
176	validated_shader->texture_samples = temp_samples;
177
178	for (i = `0`; i < `4`; i++)
179	validation_state->tmu_setup[tmu].p_offset[i] = ~`0`;
180
181	return true;
182	}
183
184	static bool
185	check_tmu_write(struct vc4_validated_shader_info *validated_shader,
186	struct vc4_shader_validation_state *validation_state,
187	bool is_mul)
188	{
189	uint64_t inst = validation_state->shader[validation_state->ip];
190	uint32_t waddr = (is_mul ?
191	QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
192	QPU_GET_FIELD(inst, QPU_WADDR_ADD));
193	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
194	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
195	int tmu = waddr > QPU_W_TMU0_B;
196	bool submit = is_tmu_submit(waddr);
197	bool is_direct = submit && validation_state->tmu_write_count[tmu] == `0`;
198	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
199
200	if (is_direct) {
201	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
202	uint32_t clamp_reg, clamp_offset;
203
204	if (sig == QPU_SIG_SMALL_IMM) {
205	DRM_DEBUG("direct TMU read used small immediate\n");
206	return false;
207	}
208
209	/ Make sure that this texture load is an add of the base*
210	* address of the UBO to a clamped offset within the UBO.
211	*/
212	if (is_mul \|\|
213	QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
214	DRM_DEBUG("direct TMU load wasn't an add\n");
215	return false;
216	}
217
218	/ We assert that the clamped address is the first*
219	* argument, and the UBO base address is the second argument.
220	* This is arbitrary, but simpler than supporting flipping the
221	* two either way.
222	*/
223	clamp_reg = raddr_add_a_to_live_reg_index(inst);
224	if (clamp_reg == ~`0`) {
225	DRM_DEBUG("direct TMU load wasn't clamped\n");
226	return false;
227	}
228
229	clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg];
230	if (clamp_offset == ~`0`) {
231	DRM_DEBUG("direct TMU load wasn't clamped\n");
232	return false;
233	}
234
235	/ Store the clamp value's offset in p1 (see reloc_tex() in*
236	* vc4_validate.c).
237	*/
238	validation_state->tmu_setup[tmu].p_offset[`1`] =
239	clamp_offset;
240
241	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
242	!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
243	DRM_DEBUG("direct TMU load didn't add to a uniform\n");
244	return false;
245	}
246
247	validation_state->tmu_setup[tmu].is_direct = true;
248	} else {
249	if (raddr_a == QPU_R_UNIF \|\| (sig != QPU_SIG_SMALL_IMM &&
250	raddr_b == QPU_R_UNIF)) {
251	DRM_DEBUG("uniform read in the same instruction as "
252	"texture setup.\n");
253	return false;
254	}
255	}
256
257	if (validation_state->tmu_write_count[tmu] >= `4`) {
258	DRM_DEBUG("TMU%d got too many parameters before dispatch\n",
259	tmu);
260	return false;
261	}
262	validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] =
263	validated_shader->uniforms_size;
264	validation_state->tmu_write_count[tmu]++;
265	/ Since direct uses a RADDR uniform reference, it will get counted in*
266	* check_instruction_reads()
267	*/
268	if (!is_direct) {
269	if (validation_state->needs_uniform_address_update) {
270	DRM_DEBUG("Texturing with undefined uniform address\n");
271	return false;
272	}
273
274	validated_shader->uniforms_size += `4`;
275	}
276
277	if (submit) {
278	if (!record_texture_sample(validated_shader,
279	validation_state, tmu)) {
280	return false;
281	}
282
283	validation_state->tmu_write_count[tmu] = `0`;
284	}
285
286	return true;
287	}
288
289	static bool require_uniform_address_uniform(struct vc4_validated_shader_info *validated_shader)
290	{
291	uint32_t o = validated_shader->num_uniform_addr_offsets;
292	uint32_t num_uniforms = validated_shader->uniforms_size / `4`;
293
294	validated_shader->uniform_addr_offsets =
295	krealloc(validated_shader->uniform_addr_offsets,
296	(o + `1`) *
297	sizeof(*validated_shader->uniform_addr_offsets),
298	GFP_KERNEL);
299	if (!validated_shader->uniform_addr_offsets)
300	return false;
301
302	validated_shader->uniform_addr_offsets[o] = num_uniforms;
303	validated_shader->num_uniform_addr_offsets++;
304
305	return true;
306	}
307
308	static bool
309	validate_uniform_address_write(struct vc4_validated_shader_info *validated_shader,
310	struct vc4_shader_validation_state *validation_state,
311	bool is_mul)
312	{
313	uint64_t inst = validation_state->shader[validation_state->ip];
314	u32 add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
315	u32 raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
316	u32 raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
317	u32 add_lri = raddr_add_a_to_live_reg_index(inst);
318	/ We want our reset to be pointing at whatever uniform follows the*
319	* uniforms base address.
320	*/
321	u32 expected_offset = validated_shader->uniforms_size + `4`;
322
323	/ We only support absolute uniform address changes, and we*
324	* require that they be in the current basic block before any
325	* of its uniform reads.
326	*
327	* One could potentially emit more efficient QPU code, by
328	* noticing that (say) an if statement does uniform control
329	* flow for all threads and that the if reads the same number
330	* of uniforms on each side. However, this scheme is easy to
331	* validate so it's all we allow for now.
332	*/
333	switch (QPU_GET_FIELD(inst, QPU_SIG)) {
334	case QPU_SIG_NONE:
335	case QPU_SIG_SCOREBOARD_UNLOCK:
336	case QPU_SIG_COLOR_LOAD:
337	case QPU_SIG_LOAD_TMU0:
338	case QPU_SIG_LOAD_TMU1:
339	break;
340	default:
341	DRM_DEBUG("uniforms address change must be "
342	"normal math\n");
343	return false;
344	}
345
346	if (is_mul \|\| QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) {
347	DRM_DEBUG("Uniform address reset must be an ADD.\n");
348	return false;
349	}
350
351	if (QPU_GET_FIELD(inst, QPU_COND_ADD) != QPU_COND_ALWAYS) {
352	DRM_DEBUG("Uniform address reset must be unconditional.\n");
353	return false;
354	}
355
356	if (QPU_GET_FIELD(inst, QPU_PACK) != QPU_PACK_A_NOP &&
357	!(inst & QPU_PM)) {
358	DRM_DEBUG("No packing allowed on uniforms reset\n");
359	return false;
360	}
361
362	if (add_lri == -`1`) {
363	DRM_DEBUG("First argument of uniform address write must be "
364	"an immediate value.\n");
365	return false;
366	}
367
368	if (validation_state->live_immediates[add_lri] != expected_offset) {
369	DRM_DEBUG("Resetting uniforms with offset %db instead of %db\n",
370	validation_state->live_immediates[add_lri],
371	expected_offset);
372	return false;
373	}
374
375	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
376	!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) {
377	DRM_DEBUG("Second argument of uniform address write must be "
378	"a uniform.\n");
379	return false;
380	}
381
382	validation_state->needs_uniform_address_update = false;
383	validation_state->needs_uniform_address_for_loop = false;
384	return require_uniform_address_uniform(validated_shader);
385	}
386
387	static bool
388	check_reg_write(struct vc4_validated_shader_info *validated_shader,
389	struct vc4_shader_validation_state *validation_state,
390	bool is_mul)
391	{
392	uint64_t inst = validation_state->shader[validation_state->ip];
393	uint32_t waddr = (is_mul ?
394	QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
395	QPU_GET_FIELD(inst, QPU_WADDR_ADD));
396	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
397	bool ws = inst & QPU_WS;
398	bool is_b = is_mul ^ ws;
399	u32 lri = waddr_to_live_reg_index(waddr, is_b);
400
401	if (lri != -`1`) {
402	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
403	uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
404
405	if (sig == QPU_SIG_LOAD_IMM &&
406	QPU_GET_FIELD(inst, QPU_PACK) == QPU_PACK_A_NOP &&
407	((is_mul && cond_mul == QPU_COND_ALWAYS) \|\|
408	(!is_mul && cond_add == QPU_COND_ALWAYS))) {
409	validation_state->live_immediates[lri] =
410	QPU_GET_FIELD(inst, QPU_LOAD_IMM);
411	} else {
412	validation_state->live_immediates[lri] = ~`0`;
413	}
414
415	if (live_reg_is_upper_half(lri))
416	validation_state->all_registers_used = true;
417	}
418
419	switch (waddr) {
420	case QPU_W_UNIFORMS_ADDRESS:
421	if (is_b) {
422	DRM_DEBUG("relative uniforms address change "
423	"unsupported\n");
424	return false;
425	}
426
427	return validate_uniform_address_write(validated_shader,
428	validation_state,
429	is_mul);
430
431	case QPU_W_TLB_COLOR_MS:
432	case QPU_W_TLB_COLOR_ALL:
433	case QPU_W_TLB_Z:
434	/ These only interact with the tile buffer, not main memory,*
435	* so they're safe.
436	*/
437	return true;
438
439	case QPU_W_TMU0_S:
440	case QPU_W_TMU0_T:
441	case QPU_W_TMU0_R:
442	case QPU_W_TMU0_B:
443	case QPU_W_TMU1_S:
444	case QPU_W_TMU1_T:
445	case QPU_W_TMU1_R:
446	case QPU_W_TMU1_B:
447	return check_tmu_write(validated_shader, validation_state,
448	is_mul);
449
450	case QPU_W_HOST_INT:
451	case QPU_W_TMU_NOSWAP:
452	case QPU_W_TLB_ALPHA_MASK:
453	case QPU_W_MUTEX_RELEASE:
454	/ XXX: I haven't thought about these, so don't support them*
455	* for now.
456	*/
457	DRM_DEBUG("Unsupported waddr %d\n", waddr);
458	return false;
459
460	case QPU_W_VPM_ADDR:
461	DRM_DEBUG("General VPM DMA unsupported\n");
462	return false;
463
464	case QPU_W_VPM:
465	case QPU_W_VPMVCD_SETUP:
466	/ We allow VPM setup in general, even including VPM DMA*
467	* configuration setup, because the (unsafe) DMA can only be
468	* triggered by QPU_W_VPM_ADDR writes.
469	*/
470	return true;
471
472	case QPU_W_TLB_STENCIL_SETUP:
473	return true;
474	}
475
476	return true;
477	}
478
479	static void
480	track_live_clamps(struct vc4_validated_shader_info *validated_shader,
481	struct vc4_shader_validation_state *validation_state)
482	{
483	uint64_t inst = validation_state->shader[validation_state->ip];
484	uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
485	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
486	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
487	uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
488	uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A);
489	uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B);
490	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
491	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
492	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
493	bool ws = inst & QPU_WS;
494	uint32_t lri_add_a, lri_add, lri_mul;
495	bool add_a_is_min_0;
496
497	/ Check whether OP_ADD's A argumennt comes from a live MAX(x, 0),*
498	* before we clear previous live state.
499	*/
500	lri_add_a = raddr_add_a_to_live_reg_index(inst);
501	add_a_is_min_0 = (lri_add_a != ~`0` &&
502	validation_state->live_max_clamp_regs[lri_add_a]);
503
504	/ Clear live state for registers written by our instruction. /
505	lri_add = waddr_to_live_reg_index(waddr: waddr_add, is_b: ws);
506	lri_mul = waddr_to_live_reg_index(waddr: waddr_mul, is_b: !ws);
507	if (lri_mul != ~`0`) {
508	validation_state->live_max_clamp_regs[lri_mul] = false;
509	validation_state->live_min_clamp_offsets[lri_mul] = ~`0`;
510	}
511	if (lri_add != ~`0`) {
512	validation_state->live_max_clamp_regs[lri_add] = false;
513	validation_state->live_min_clamp_offsets[lri_add] = ~`0`;
514	} else {
515	/ Nothing further to do for live tracking, since only ADDs*
516	* generate new live clamp registers.
517	*/
518	return;
519	}
520
521	/ Now, handle remaining live clamp tracking for the ADD operation. /
522
523	if (cond_add != QPU_COND_ALWAYS)
524	return;
525
526	if (op_add == QPU_A_MAX) {
527	/ Track live clamps of a value to a minimum of 0 (in either*
528	* arg).
529	*/
530	if (sig != QPU_SIG_SMALL_IMM \|\| raddr_b != `0` \|\|
531	(add_a != QPU_MUX_B && add_b != QPU_MUX_B)) {
532	return;
533	}
534
535	validation_state->live_max_clamp_regs[lri_add] = true;
536	} else if (op_add == QPU_A_MIN) {
537	/ Track live clamps of a value clamped to a minimum of 0 and*
538	* a maximum of some uniform's offset.
539	*/
540	if (!add_a_is_min_0)
541	return;
542
543	if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) &&
544	!(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF &&
545	sig != QPU_SIG_SMALL_IMM)) {
546	return;
547	}
548
549	validation_state->live_min_clamp_offsets[lri_add] =
550	validated_shader->uniforms_size;
551	}
552	}
553
554	static bool
555	check_instruction_writes(struct vc4_validated_shader_info *validated_shader,
556	struct vc4_shader_validation_state *validation_state)
557	{
558	uint64_t inst = validation_state->shader[validation_state->ip];
559	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
560	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
561	bool ok;
562
563	if (is_tmu_write(waddr: waddr_add) && is_tmu_write(waddr: waddr_mul)) {
564	DRM_DEBUG("ADD and MUL both set up textures\n");
565	return false;
566	}
567
568	ok = (check_reg_write(validated_shader, validation_state, is_mul: false) &&
569	check_reg_write(validated_shader, validation_state, is_mul: true));
570
571	track_live_clamps(validated_shader, validation_state);
572
573	return ok;
574	}
575
576	static bool
577	check_branch(uint64_t inst,
578	struct vc4_validated_shader_info *validated_shader,
579	struct vc4_shader_validation_state *validation_state,
580	int ip)
581	{
582	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
583	uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
584	uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
585
586	if ((int)branch_imm < `0`)
587	validation_state->needs_uniform_address_for_loop = true;
588
589	/ We don't want to have to worry about validation of this, and*
590	* there's no need for it.
591	*/
592	if (waddr_add != QPU_W_NOP \|\| waddr_mul != QPU_W_NOP) {
593	DRM_DEBUG("branch instruction at %d wrote a register.\n",
594	validation_state->ip);
595	return false;
596	}
597
598	return true;
599	}
600
601	static bool
602	check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
603	struct vc4_shader_validation_state *validation_state)
604	{
605	uint64_t inst = validation_state->shader[validation_state->ip];
606	uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
607	uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
608	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
609
610	if (raddr_a == QPU_R_UNIF \|\|
611	(raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) {
612	/ This can't overflow the uint32_t, because we're reading 8*
613	* bytes of instruction to increment by 4 here, so we'd
614	* already be OOM.
615	*/
616	validated_shader->uniforms_size += `4`;
617
618	if (validation_state->needs_uniform_address_update) {
619	DRM_DEBUG("Uniform read with undefined uniform "
620	"address\n");
621	return false;
622	}
623	}
624
625	if ((raddr_a >= `16` && raddr_a < `32`) \|\|
626	(raddr_b >= `16` && raddr_b < `32` && sig != QPU_SIG_SMALL_IMM)) {
627	validation_state->all_registers_used = true;
628	}
629
630	return true;
631	}
632
633	/ Make sure that all branches are absolute and point within the shader, and*
634	* note their targets for later.
635	*/
636	static bool
637	vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
638	{
639	uint32_t max_branch_target = `0`;
640	int ip;
641	int last_branch = -`2`;
642
643	for (ip = `0`; ip < validation_state->max_ip; ip++) {
644	uint64_t inst = validation_state->shader[ip];
645	int32_t branch_imm = QPU_GET_FIELD(inst, QPU_BRANCH_TARGET);
646	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
647	uint32_t after_delay_ip = ip + `4`;
648	uint32_t branch_target_ip;
649
650	if (sig == QPU_SIG_PROG_END) {
651	/ There are two delay slots after program end is*
652	* signaled that are still executed, then we're
653	* finished. validation_state->max_ip is the
654	* instruction after the last valid instruction in the
655	* program.
656	*/
657	validation_state->max_ip = ip + `3`;
658	continue;
659	}
660
661	if (sig != QPU_SIG_BRANCH)
662	continue;
663
664	if (ip - last_branch < `4`) {
665	DRM_DEBUG("Branch at %d during delay slots\n", ip);
666	return false;
667	}
668	last_branch = ip;
669
670	if (inst & QPU_BRANCH_REG) {
671	DRM_DEBUG("branching from register relative "
672	"not supported\n");
673	return false;
674	}
675
676	if (!(inst & QPU_BRANCH_REL)) {
677	DRM_DEBUG("relative branching required\n");
678	return false;
679	}
680
681	/ The actual branch target is the instruction after the delay*
682	* slots, plus whatever byte offset is in the low 32 bits of
683	* the instruction. Make sure we're not branching beyond the
684	* end of the shader object.
685	*/
686	if (branch_imm % sizeof(inst) != `0`) {
687	DRM_DEBUG("branch target not aligned\n");
688	return false;
689	}
690
691	branch_target_ip = after_delay_ip + (branch_imm >> `3`);
692	if (branch_target_ip >= validation_state->max_ip) {
693	DRM_DEBUG("Branch at %d outside of shader (ip %d/%d)\n",
694	ip, branch_target_ip,
695	validation_state->max_ip);
696	return false;
697	}
698	set_bit(nr: branch_target_ip, addr: validation_state->branch_targets);
699
700	/ Make sure that the non-branching path is also not outside*
701	* the shader.
702	*/
703	if (after_delay_ip >= validation_state->max_ip) {
704	DRM_DEBUG("Branch at %d continues past shader end "
705	"(%d/%d)\n",
706	ip, after_delay_ip, validation_state->max_ip);
707	return false;
708	}
709	set_bit(nr: after_delay_ip, addr: validation_state->branch_targets);
710	max_branch_target = max(max_branch_target, after_delay_ip);
711	}
712
713	if (max_branch_target > validation_state->max_ip - `3`) {
714	DRM_DEBUG("Branch landed after QPU_SIG_PROG_END");
715	return false;
716	}
717
718	return true;
719	}
720
721	/ Resets any known state for the shader, used when we may be branched to from*
722	* multiple locations in the program (or at shader start).
723	*/
724	static void
725	reset_validation_state(struct vc4_shader_validation_state *validation_state)
726	{
727	int i;
728
729	for (i = `0`; i < `8`; i++)
730	validation_state->tmu_setup[i / `4`].p_offset[i % `4`] = ~`0`;
731
732	for (i = `0`; i < LIVE_REG_COUNT; i++) {
733	validation_state->live_min_clamp_offsets[i] = ~`0`;
734	validation_state->live_max_clamp_regs[i] = false;
735	validation_state->live_immediates[i] = ~`0`;
736	}
737	}
738
739	static bool
740	texturing_in_progress(struct vc4_shader_validation_state *validation_state)
741	{
742	return (validation_state->tmu_write_count[`0`] != `0` \|\|
743	validation_state->tmu_write_count[`1`] != `0`);
744	}
745
746	static bool
747	vc4_handle_branch_target(struct vc4_shader_validation_state *validation_state)
748	{
749	uint32_t ip = validation_state->ip;
750
751	if (!test_bit(ip, validation_state->branch_targets))
752	return true;
753
754	if (texturing_in_progress(validation_state)) {
755	DRM_DEBUG("Branch target landed during TMU setup\n");
756	return false;
757	}
758
759	/ Reset our live values tracking, since this instruction may have*
760	* multiple predecessors.
761	*
762	* One could potentially do analysis to determine that, for
763	* example, all predecessors have a live max clamp in the same
764	* register, but we don't bother with that.
765	*/
766	reset_validation_state(validation_state);
767
768	/ Since we've entered a basic block from potentially multiple*
769	* predecessors, we need the uniforms address to be updated before any
770	* unforms are read. We require that after any branch point, the next
771	* uniform to be loaded is a uniform address offset. That uniform's
772	* offset will be marked by the uniform address register write
773	* validation, or a one-off the end-of-program check.
774	*/
775	validation_state->needs_uniform_address_update = true;
776
777	return true;
778	}
779
780	struct vc4_validated_shader_info *
781	vc4_validate_shader(struct drm_gem_dma_object *shader_obj)
782	{
783	struct vc4_dev *vc4 = to_vc4_dev(shader_obj->base.dev);
784	bool found_shader_end = false;
785	int shader_end_ip = `0`;
786	uint32_t last_thread_switch_ip = -`3`;
787	uint32_t ip;
788	struct vc4_validated_shader_info *validated_shader = NULL;
789	struct vc4_shader_validation_state validation_state;
790
791	if (WARN_ON_ONCE(vc4->gen > VC4_GEN_4))
792	return NULL;
793
794	memset(&validation_state, `0`, sizeof(validation_state));
795	validation_state.shader = shader_obj->vaddr;
796	validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
797
798	reset_validation_state(validation_state: &validation_state);
799
800	validation_state.branch_targets =
801	kcalloc(BITS_TO_LONGS(validation_state.max_ip),
802	sizeof(unsigned long), GFP_KERNEL);
803	if (!validation_state.branch_targets)
804	goto fail;
805
806	validated_shader = kcalloc(`1`, sizeof(*validated_shader), GFP_KERNEL);
807	if (!validated_shader)
808	goto fail;
809
810	if (!vc4_validate_branches(validation_state: &validation_state))
811	goto fail;
812
813	for (ip = `0`; ip < validation_state.max_ip; ip++) {
814	uint64_t inst = validation_state.shader[ip];
815	uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
816
817	validation_state.ip = ip;
818
819	if (!vc4_handle_branch_target(validation_state: &validation_state))
820	goto fail;
821
822	if (ip == last_thread_switch_ip + `3`) {
823	/ Reset r0-r3 live clamp data /
824	int i;
825
826	for (i = `64`; i < LIVE_REG_COUNT; i++) {
827	validation_state.live_min_clamp_offsets[i] = ~`0`;
828	validation_state.live_max_clamp_regs[i] = false;
829	validation_state.live_immediates[i] = ~`0`;
830	}
831	}
832
833	switch (sig) {
834	case QPU_SIG_NONE:
835	case QPU_SIG_WAIT_FOR_SCOREBOARD:
836	case QPU_SIG_SCOREBOARD_UNLOCK:
837	case QPU_SIG_COLOR_LOAD:
838	case QPU_SIG_LOAD_TMU0:
839	case QPU_SIG_LOAD_TMU1:
840	case QPU_SIG_PROG_END:
841	case QPU_SIG_SMALL_IMM:
842	case QPU_SIG_THREAD_SWITCH:
843	case QPU_SIG_LAST_THREAD_SWITCH:
844	if (!check_instruction_writes(validated_shader,
845	validation_state: &validation_state)) {
846	DRM_DEBUG("Bad write at ip %d\n", ip);
847	goto fail;
848	}
849
850	if (!check_instruction_reads(validated_shader,
851	validation_state: &validation_state))
852	goto fail;
853
854	if (sig == QPU_SIG_PROG_END) {
855	found_shader_end = true;
856	shader_end_ip = ip;
857	}
858
859	if (sig == QPU_SIG_THREAD_SWITCH \|\|
860	sig == QPU_SIG_LAST_THREAD_SWITCH) {
861	validated_shader->is_threaded = true;
862
863	if (ip < last_thread_switch_ip + `3`) {
864	DRM_DEBUG("Thread switch too soon after "
865	"last switch at ip %d\n", ip);
866	goto fail;
867	}
868	last_thread_switch_ip = ip;
869	}
870
871	break;
872
873	case QPU_SIG_LOAD_IMM:
874	if (!check_instruction_writes(validated_shader,
875	validation_state: &validation_state)) {
876	DRM_DEBUG("Bad LOAD_IMM write at ip %d\n", ip);
877	goto fail;
878	}
879	break;
880
881	case QPU_SIG_BRANCH:
882	if (!check_branch(inst, validated_shader,
883	validation_state: &validation_state, ip))
884	goto fail;
885
886	if (ip < last_thread_switch_ip + `3`) {
887	DRM_DEBUG("Branch in thread switch at ip %d",
888	ip);
889	goto fail;
890	}
891
892	break;
893	default:
894	DRM_DEBUG("Unsupported QPU signal %d at "
895	"instruction %d\n", sig, ip);
896	goto fail;
897	}
898
899	/ There are two delay slots after program end is signaled*
900	* that are still executed, then we're finished.
901	*/
902	if (found_shader_end && ip == shader_end_ip + `2`)
903	break;
904	}
905
906	if (ip == validation_state.max_ip) {
907	DRM_DEBUG("shader failed to terminate before "
908	"shader BO end at %zd\n",
909	shader_obj->base.size);
910	goto fail;
911	}
912
913	/ Might corrupt other thread /
914	if (validated_shader->is_threaded &&
915	validation_state.all_registers_used) {
916	DRM_DEBUG("Shader uses threading, but uses the upper "
917	"half of the registers, too\n");
918	goto fail;
919	}
920
921	/ If we did a backwards branch and we haven't emitted a uniforms*
922	* reset since then, we still need the uniforms stream to have the
923	* uniforms address available so that the backwards branch can do its
924	* uniforms reset.
925	*
926	* We could potentially prove that the backwards branch doesn't
927	* contain any uses of uniforms until program exit, but that doesn't
928	* seem to be worth the trouble.
929	*/
930	if (validation_state.needs_uniform_address_for_loop) {
931	if (!require_uniform_address_uniform(validated_shader))
932	goto fail;
933	validated_shader->uniforms_size += `4`;
934	}
935
936	/ Again, no chance of integer overflow here because the worst case*
937	* scenario is 8 bytes of uniforms plus handles per 8-byte
938	* instruction.
939	*/
940	validated_shader->uniforms_src_size =
941	(validated_shader->uniforms_size +
942	`4` * validated_shader->num_texture_samples);
943
944	kfree(objp: validation_state.branch_targets);
945
946	return validated_shader;
947
948	fail:
949	kfree(objp: validation_state.branch_targets);
950	if (validated_shader) {
951	kfree(objp: validated_shader->uniform_addr_offsets);
952	kfree(objp: validated_shader->texture_samples);
953	kfree(objp: validated_shader);
954	}
955	return NULL;
956	}
957

source code of linux/drivers/gpu/drm/vc4/vc4_validate_shaders.c