Commit | Line | Data |
---|---|---|
463873d5 EA |
1 | /* |
2 | * Copyright © 2014 Broadcom | |
3 | * | |
4 | * Permission is hereby granted, free of charge, to any person obtaining a | |
5 | * copy of this software and associated documentation files (the "Software"), | |
6 | * to deal in the Software without restriction, including without limitation | |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
8 | * and/or sell copies of the Software, and to permit persons to whom the | |
9 | * Software is furnished to do so, subject to the following conditions: | |
10 | * | |
11 | * The above copyright notice and this permission notice (including the next | |
12 | * paragraph) shall be included in all copies or substantial portions of the | |
13 | * Software. | |
14 | * | |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL | |
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING | |
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
21 | * IN THE SOFTWARE. | |
22 | */ | |
23 | ||
24 | /** | |
25 | * DOC: Shader validator for VC4. | |
26 | * | |
27 | * The VC4 has no IOMMU between it and system memory, so a user with | |
28 | * access to execute shaders could escalate privilege by overwriting | |
29 | * system memory (using the VPM write address register in the | |
30 | * general-purpose DMA mode) or reading system memory it shouldn't | |
31 | * (reading it as a texture, or uniform data, or vertex data). | |
32 | * | |
33 | * This walks over a shader BO, ensuring that its accesses are | |
34 | * appropriately bounded, and recording how many texture accesses are | |
35 | * made and where so that we can do relocations for them in the | |
36 | * uniform stream. | |
37 | */ | |
38 | ||
39 | #include "vc4_drv.h" | |
40 | #include "vc4_qpu_defines.h" | |
41 | ||
42 | struct vc4_shader_validation_state { | |
43 | struct vc4_texture_sample_info tmu_setup[2]; | |
44 | int tmu_write_count[2]; | |
45 | ||
46 | /* For registers that were last written to by a MIN instruction with | |
47 | * one argument being a uniform, the address of the uniform. | |
48 | * Otherwise, ~0. | |
49 | * | |
50 | * This is used for the validation of direct address memory reads. | |
51 | */ | |
52 | uint32_t live_min_clamp_offsets[32 + 32 + 4]; | |
53 | bool live_max_clamp_regs[32 + 32 + 4]; | |
54 | }; | |
55 | ||
56 | static uint32_t | |
57 | waddr_to_live_reg_index(uint32_t waddr, bool is_b) | |
58 | { | |
59 | if (waddr < 32) { | |
60 | if (is_b) | |
61 | return 32 + waddr; | |
62 | else | |
63 | return waddr; | |
64 | } else if (waddr <= QPU_W_ACC3) { | |
65 | return 64 + waddr - QPU_W_ACC0; | |
66 | } else { | |
67 | return ~0; | |
68 | } | |
69 | } | |
70 | ||
71 | static uint32_t | |
72 | raddr_add_a_to_live_reg_index(uint64_t inst) | |
73 | { | |
74 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
75 | uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); | |
76 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
77 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
78 | ||
79 | if (add_a == QPU_MUX_A) | |
80 | return raddr_a; | |
81 | else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) | |
82 | return 32 + raddr_b; | |
83 | else if (add_a <= QPU_MUX_R3) | |
84 | return 64 + add_a; | |
85 | else | |
86 | return ~0; | |
87 | } | |
88 | ||
89 | static bool | |
90 | is_tmu_submit(uint32_t waddr) | |
91 | { | |
92 | return (waddr == QPU_W_TMU0_S || | |
93 | waddr == QPU_W_TMU1_S); | |
94 | } | |
95 | ||
96 | static bool | |
97 | is_tmu_write(uint32_t waddr) | |
98 | { | |
99 | return (waddr >= QPU_W_TMU0_S && | |
100 | waddr <= QPU_W_TMU1_B); | |
101 | } | |
102 | ||
103 | static bool | |
104 | record_texture_sample(struct vc4_validated_shader_info *validated_shader, | |
105 | struct vc4_shader_validation_state *validation_state, | |
106 | int tmu) | |
107 | { | |
108 | uint32_t s = validated_shader->num_texture_samples; | |
109 | int i; | |
110 | struct vc4_texture_sample_info *temp_samples; | |
111 | ||
112 | temp_samples = krealloc(validated_shader->texture_samples, | |
113 | (s + 1) * sizeof(*temp_samples), | |
114 | GFP_KERNEL); | |
115 | if (!temp_samples) | |
116 | return false; | |
117 | ||
118 | memcpy(&temp_samples[s], | |
119 | &validation_state->tmu_setup[tmu], | |
120 | sizeof(*temp_samples)); | |
121 | ||
122 | validated_shader->num_texture_samples = s + 1; | |
123 | validated_shader->texture_samples = temp_samples; | |
124 | ||
125 | for (i = 0; i < 4; i++) | |
126 | validation_state->tmu_setup[tmu].p_offset[i] = ~0; | |
127 | ||
128 | return true; | |
129 | } | |
130 | ||
131 | static bool | |
132 | check_tmu_write(uint64_t inst, | |
133 | struct vc4_validated_shader_info *validated_shader, | |
134 | struct vc4_shader_validation_state *validation_state, | |
135 | bool is_mul) | |
136 | { | |
137 | uint32_t waddr = (is_mul ? | |
138 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : | |
139 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); | |
140 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
141 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
142 | int tmu = waddr > QPU_W_TMU0_B; | |
143 | bool submit = is_tmu_submit(waddr); | |
144 | bool is_direct = submit && validation_state->tmu_write_count[tmu] == 0; | |
145 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
146 | ||
147 | if (is_direct) { | |
148 | uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); | |
149 | uint32_t clamp_reg, clamp_offset; | |
150 | ||
151 | if (sig == QPU_SIG_SMALL_IMM) { | |
152 | DRM_ERROR("direct TMU read used small immediate\n"); | |
153 | return false; | |
154 | } | |
155 | ||
156 | /* Make sure that this texture load is an add of the base | |
157 | * address of the UBO to a clamped offset within the UBO. | |
158 | */ | |
159 | if (is_mul || | |
160 | QPU_GET_FIELD(inst, QPU_OP_ADD) != QPU_A_ADD) { | |
161 | DRM_ERROR("direct TMU load wasn't an add\n"); | |
162 | return false; | |
163 | } | |
164 | ||
165 | /* We assert that the the clamped address is the first | |
166 | * argument, and the UBO base address is the second argument. | |
167 | * This is arbitrary, but simpler than supporting flipping the | |
168 | * two either way. | |
169 | */ | |
170 | clamp_reg = raddr_add_a_to_live_reg_index(inst); | |
171 | if (clamp_reg == ~0) { | |
172 | DRM_ERROR("direct TMU load wasn't clamped\n"); | |
173 | return false; | |
174 | } | |
175 | ||
176 | clamp_offset = validation_state->live_min_clamp_offsets[clamp_reg]; | |
177 | if (clamp_offset == ~0) { | |
178 | DRM_ERROR("direct TMU load wasn't clamped\n"); | |
179 | return false; | |
180 | } | |
181 | ||
182 | /* Store the clamp value's offset in p1 (see reloc_tex() in | |
183 | * vc4_validate.c). | |
184 | */ | |
185 | validation_state->tmu_setup[tmu].p_offset[1] = | |
186 | clamp_offset; | |
187 | ||
188 | if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && | |
189 | !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF)) { | |
190 | DRM_ERROR("direct TMU load didn't add to a uniform\n"); | |
191 | return false; | |
192 | } | |
193 | ||
194 | validation_state->tmu_setup[tmu].is_direct = true; | |
195 | } else { | |
196 | if (raddr_a == QPU_R_UNIF || (sig != QPU_SIG_SMALL_IMM && | |
197 | raddr_b == QPU_R_UNIF)) { | |
198 | DRM_ERROR("uniform read in the same instruction as " | |
199 | "texture setup.\n"); | |
200 | return false; | |
201 | } | |
202 | } | |
203 | ||
204 | if (validation_state->tmu_write_count[tmu] >= 4) { | |
205 | DRM_ERROR("TMU%d got too many parameters before dispatch\n", | |
206 | tmu); | |
207 | return false; | |
208 | } | |
209 | validation_state->tmu_setup[tmu].p_offset[validation_state->tmu_write_count[tmu]] = | |
210 | validated_shader->uniforms_size; | |
211 | validation_state->tmu_write_count[tmu]++; | |
212 | /* Since direct uses a RADDR uniform reference, it will get counted in | |
213 | * check_instruction_reads() | |
214 | */ | |
215 | if (!is_direct) | |
216 | validated_shader->uniforms_size += 4; | |
217 | ||
218 | if (submit) { | |
219 | if (!record_texture_sample(validated_shader, | |
220 | validation_state, tmu)) { | |
221 | return false; | |
222 | } | |
223 | ||
224 | validation_state->tmu_write_count[tmu] = 0; | |
225 | } | |
226 | ||
227 | return true; | |
228 | } | |
229 | ||
230 | static bool | |
231 | check_reg_write(uint64_t inst, | |
232 | struct vc4_validated_shader_info *validated_shader, | |
233 | struct vc4_shader_validation_state *validation_state, | |
234 | bool is_mul) | |
235 | { | |
236 | uint32_t waddr = (is_mul ? | |
237 | QPU_GET_FIELD(inst, QPU_WADDR_MUL) : | |
238 | QPU_GET_FIELD(inst, QPU_WADDR_ADD)); | |
239 | ||
240 | switch (waddr) { | |
241 | case QPU_W_UNIFORMS_ADDRESS: | |
242 | /* XXX: We'll probably need to support this for reladdr, but | |
243 | * it's definitely a security-related one. | |
244 | */ | |
245 | DRM_ERROR("uniforms address load unsupported\n"); | |
246 | return false; | |
247 | ||
248 | case QPU_W_TLB_COLOR_MS: | |
249 | case QPU_W_TLB_COLOR_ALL: | |
250 | case QPU_W_TLB_Z: | |
251 | /* These only interact with the tile buffer, not main memory, | |
252 | * so they're safe. | |
253 | */ | |
254 | return true; | |
255 | ||
256 | case QPU_W_TMU0_S: | |
257 | case QPU_W_TMU0_T: | |
258 | case QPU_W_TMU0_R: | |
259 | case QPU_W_TMU0_B: | |
260 | case QPU_W_TMU1_S: | |
261 | case QPU_W_TMU1_T: | |
262 | case QPU_W_TMU1_R: | |
263 | case QPU_W_TMU1_B: | |
264 | return check_tmu_write(inst, validated_shader, validation_state, | |
265 | is_mul); | |
266 | ||
267 | case QPU_W_HOST_INT: | |
268 | case QPU_W_TMU_NOSWAP: | |
269 | case QPU_W_TLB_ALPHA_MASK: | |
270 | case QPU_W_MUTEX_RELEASE: | |
271 | /* XXX: I haven't thought about these, so don't support them | |
272 | * for now. | |
273 | */ | |
274 | DRM_ERROR("Unsupported waddr %d\n", waddr); | |
275 | return false; | |
276 | ||
277 | case QPU_W_VPM_ADDR: | |
278 | DRM_ERROR("General VPM DMA unsupported\n"); | |
279 | return false; | |
280 | ||
281 | case QPU_W_VPM: | |
282 | case QPU_W_VPMVCD_SETUP: | |
283 | /* We allow VPM setup in general, even including VPM DMA | |
284 | * configuration setup, because the (unsafe) DMA can only be | |
285 | * triggered by QPU_W_VPM_ADDR writes. | |
286 | */ | |
287 | return true; | |
288 | ||
289 | case QPU_W_TLB_STENCIL_SETUP: | |
290 | return true; | |
291 | } | |
292 | ||
293 | return true; | |
294 | } | |
295 | ||
296 | static void | |
297 | track_live_clamps(uint64_t inst, | |
298 | struct vc4_validated_shader_info *validated_shader, | |
299 | struct vc4_shader_validation_state *validation_state) | |
300 | { | |
301 | uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); | |
302 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); | |
303 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); | |
304 | uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); | |
305 | uint32_t add_a = QPU_GET_FIELD(inst, QPU_ADD_A); | |
306 | uint32_t add_b = QPU_GET_FIELD(inst, QPU_ADD_B); | |
307 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
308 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
309 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
310 | bool ws = inst & QPU_WS; | |
311 | uint32_t lri_add_a, lri_add, lri_mul; | |
312 | bool add_a_is_min_0; | |
313 | ||
314 | /* Check whether OP_ADD's A argumennt comes from a live MAX(x, 0), | |
315 | * before we clear previous live state. | |
316 | */ | |
317 | lri_add_a = raddr_add_a_to_live_reg_index(inst); | |
318 | add_a_is_min_0 = (lri_add_a != ~0 && | |
319 | validation_state->live_max_clamp_regs[lri_add_a]); | |
320 | ||
321 | /* Clear live state for registers written by our instruction. */ | |
322 | lri_add = waddr_to_live_reg_index(waddr_add, ws); | |
323 | lri_mul = waddr_to_live_reg_index(waddr_mul, !ws); | |
324 | if (lri_mul != ~0) { | |
325 | validation_state->live_max_clamp_regs[lri_mul] = false; | |
326 | validation_state->live_min_clamp_offsets[lri_mul] = ~0; | |
327 | } | |
328 | if (lri_add != ~0) { | |
329 | validation_state->live_max_clamp_regs[lri_add] = false; | |
330 | validation_state->live_min_clamp_offsets[lri_add] = ~0; | |
331 | } else { | |
332 | /* Nothing further to do for live tracking, since only ADDs | |
333 | * generate new live clamp registers. | |
334 | */ | |
335 | return; | |
336 | } | |
337 | ||
338 | /* Now, handle remaining live clamp tracking for the ADD operation. */ | |
339 | ||
340 | if (cond_add != QPU_COND_ALWAYS) | |
341 | return; | |
342 | ||
343 | if (op_add == QPU_A_MAX) { | |
344 | /* Track live clamps of a value to a minimum of 0 (in either | |
345 | * arg). | |
346 | */ | |
347 | if (sig != QPU_SIG_SMALL_IMM || raddr_b != 0 || | |
348 | (add_a != QPU_MUX_B && add_b != QPU_MUX_B)) { | |
349 | return; | |
350 | } | |
351 | ||
352 | validation_state->live_max_clamp_regs[lri_add] = true; | |
353 | } else if (op_add == QPU_A_MIN) { | |
354 | /* Track live clamps of a value clamped to a minimum of 0 and | |
355 | * a maximum of some uniform's offset. | |
356 | */ | |
357 | if (!add_a_is_min_0) | |
358 | return; | |
359 | ||
360 | if (!(add_b == QPU_MUX_A && raddr_a == QPU_R_UNIF) && | |
361 | !(add_b == QPU_MUX_B && raddr_b == QPU_R_UNIF && | |
362 | sig != QPU_SIG_SMALL_IMM)) { | |
363 | return; | |
364 | } | |
365 | ||
366 | validation_state->live_min_clamp_offsets[lri_add] = | |
367 | validated_shader->uniforms_size; | |
368 | } | |
369 | } | |
370 | ||
371 | static bool | |
372 | check_instruction_writes(uint64_t inst, | |
373 | struct vc4_validated_shader_info *validated_shader, | |
374 | struct vc4_shader_validation_state *validation_state) | |
375 | { | |
376 | uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); | |
377 | uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); | |
378 | bool ok; | |
379 | ||
380 | if (is_tmu_write(waddr_add) && is_tmu_write(waddr_mul)) { | |
381 | DRM_ERROR("ADD and MUL both set up textures\n"); | |
382 | return false; | |
383 | } | |
384 | ||
385 | ok = (check_reg_write(inst, validated_shader, validation_state, | |
386 | false) && | |
387 | check_reg_write(inst, validated_shader, validation_state, | |
388 | true)); | |
389 | ||
390 | track_live_clamps(inst, validated_shader, validation_state); | |
391 | ||
392 | return ok; | |
393 | } | |
394 | ||
395 | static bool | |
396 | check_instruction_reads(uint64_t inst, | |
397 | struct vc4_validated_shader_info *validated_shader) | |
398 | { | |
399 | uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A); | |
400 | uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B); | |
401 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
402 | ||
403 | if (raddr_a == QPU_R_UNIF || | |
404 | (raddr_b == QPU_R_UNIF && sig != QPU_SIG_SMALL_IMM)) { | |
405 | /* This can't overflow the uint32_t, because we're reading 8 | |
406 | * bytes of instruction to increment by 4 here, so we'd | |
407 | * already be OOM. | |
408 | */ | |
409 | validated_shader->uniforms_size += 4; | |
410 | } | |
411 | ||
412 | return true; | |
413 | } | |
414 | ||
415 | struct vc4_validated_shader_info * | |
416 | vc4_validate_shader(struct drm_gem_cma_object *shader_obj) | |
417 | { | |
418 | bool found_shader_end = false; | |
419 | int shader_end_ip = 0; | |
420 | uint32_t ip, max_ip; | |
421 | uint64_t *shader; | |
422 | struct vc4_validated_shader_info *validated_shader; | |
423 | struct vc4_shader_validation_state validation_state; | |
424 | int i; | |
425 | ||
426 | memset(&validation_state, 0, sizeof(validation_state)); | |
427 | ||
428 | for (i = 0; i < 8; i++) | |
429 | validation_state.tmu_setup[i / 4].p_offset[i % 4] = ~0; | |
430 | for (i = 0; i < ARRAY_SIZE(validation_state.live_min_clamp_offsets); i++) | |
431 | validation_state.live_min_clamp_offsets[i] = ~0; | |
432 | ||
433 | shader = shader_obj->vaddr; | |
434 | max_ip = shader_obj->base.size / sizeof(uint64_t); | |
435 | ||
436 | validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL); | |
437 | if (!validated_shader) | |
438 | return NULL; | |
439 | ||
440 | for (ip = 0; ip < max_ip; ip++) { | |
441 | uint64_t inst = shader[ip]; | |
442 | uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); | |
443 | ||
444 | switch (sig) { | |
445 | case QPU_SIG_NONE: | |
446 | case QPU_SIG_WAIT_FOR_SCOREBOARD: | |
447 | case QPU_SIG_SCOREBOARD_UNLOCK: | |
448 | case QPU_SIG_COLOR_LOAD: | |
449 | case QPU_SIG_LOAD_TMU0: | |
450 | case QPU_SIG_LOAD_TMU1: | |
451 | case QPU_SIG_PROG_END: | |
452 | case QPU_SIG_SMALL_IMM: | |
453 | if (!check_instruction_writes(inst, validated_shader, | |
454 | &validation_state)) { | |
455 | DRM_ERROR("Bad write at ip %d\n", ip); | |
456 | goto fail; | |
457 | } | |
458 | ||
459 | if (!check_instruction_reads(inst, validated_shader)) | |
460 | goto fail; | |
461 | ||
462 | if (sig == QPU_SIG_PROG_END) { | |
463 | found_shader_end = true; | |
464 | shader_end_ip = ip; | |
465 | } | |
466 | ||
467 | break; | |
468 | ||
469 | case QPU_SIG_LOAD_IMM: | |
470 | if (!check_instruction_writes(inst, validated_shader, | |
471 | &validation_state)) { | |
472 | DRM_ERROR("Bad LOAD_IMM write at ip %d\n", ip); | |
473 | goto fail; | |
474 | } | |
475 | break; | |
476 | ||
477 | default: | |
478 | DRM_ERROR("Unsupported QPU signal %d at " | |
479 | "instruction %d\n", sig, ip); | |
480 | goto fail; | |
481 | } | |
482 | ||
483 | /* There are two delay slots after program end is signaled | |
484 | * that are still executed, then we're finished. | |
485 | */ | |
486 | if (found_shader_end && ip == shader_end_ip + 2) | |
487 | break; | |
488 | } | |
489 | ||
490 | if (ip == max_ip) { | |
491 | DRM_ERROR("shader failed to terminate before " | |
492 | "shader BO end at %zd\n", | |
493 | shader_obj->base.size); | |
494 | goto fail; | |
495 | } | |
496 | ||
497 | /* Again, no chance of integer overflow here because the worst case | |
498 | * scenario is 8 bytes of uniforms plus handles per 8-byte | |
499 | * instruction. | |
500 | */ | |
501 | validated_shader->uniforms_src_size = | |
502 | (validated_shader->uniforms_size + | |
503 | 4 * validated_shader->num_texture_samples); | |
504 | ||
505 | return validated_shader; | |
506 | ||
507 | fail: | |
508 | if (validated_shader) { | |
509 | kfree(validated_shader->texture_samples); | |
510 | kfree(validated_shader); | |
511 | } | |
512 | return NULL; | |
513 | } |