2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device
*rdev
);
58 extern void r600_ih_ring_fini(struct radeon_device
*rdev
);
59 extern void evergreen_mc_stop(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
60 extern void evergreen_mc_resume(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
61 extern bool evergreen_is_display_hung(struct radeon_device
*rdev
);
62 extern void sumo_rlc_fini(struct radeon_device
*rdev
);
63 extern int sumo_rlc_init(struct radeon_device
*rdev
);
64 extern void si_vram_gtt_location(struct radeon_device
*rdev
, struct radeon_mc
*mc
);
65 extern void si_rlc_reset(struct radeon_device
*rdev
);
66 extern void si_init_uvd_internal_cg(struct radeon_device
*rdev
);
67 static void cik_rlc_stop(struct radeon_device
*rdev
);
68 static void cik_pcie_gen3_enable(struct radeon_device
*rdev
);
69 static void cik_program_aspm(struct radeon_device
*rdev
);
70 static void cik_init_pg(struct radeon_device
*rdev
);
71 static void cik_init_cg(struct radeon_device
*rdev
);
73 /* get temperature in millidegrees */
74 int ci_get_temp(struct radeon_device
*rdev
)
79 temp
= (RREG32_SMC(CG_MULT_THERMAL_STATUS
) & CTF_TEMP_MASK
) >>
85 actual_temp
= temp
& 0x1ff;
87 actual_temp
= actual_temp
* 1000;
92 /* get temperature in millidegrees */
93 int kv_get_temp(struct radeon_device
*rdev
)
98 temp
= RREG32_SMC(0xC0300E0C);
101 actual_temp
= (temp
/ 8) - 49;
105 actual_temp
= actual_temp
* 1000;
111 * Indirect registers accessor
113 u32
cik_pciep_rreg(struct radeon_device
*rdev
, u32 reg
)
117 WREG32(PCIE_INDEX
, reg
);
118 (void)RREG32(PCIE_INDEX
);
119 r
= RREG32(PCIE_DATA
);
123 void cik_pciep_wreg(struct radeon_device
*rdev
, u32 reg
, u32 v
)
125 WREG32(PCIE_INDEX
, reg
);
126 (void)RREG32(PCIE_INDEX
);
127 WREG32(PCIE_DATA
, v
);
128 (void)RREG32(PCIE_DATA
);
131 static const u32 spectre_rlc_save_restore_register_list
[] =
133 (0x0e00 << 16) | (0xc12c >> 2),
135 (0x0e00 << 16) | (0xc140 >> 2),
137 (0x0e00 << 16) | (0xc150 >> 2),
139 (0x0e00 << 16) | (0xc15c >> 2),
141 (0x0e00 << 16) | (0xc168 >> 2),
143 (0x0e00 << 16) | (0xc170 >> 2),
145 (0x0e00 << 16) | (0xc178 >> 2),
147 (0x0e00 << 16) | (0xc204 >> 2),
149 (0x0e00 << 16) | (0xc2b4 >> 2),
151 (0x0e00 << 16) | (0xc2b8 >> 2),
153 (0x0e00 << 16) | (0xc2bc >> 2),
155 (0x0e00 << 16) | (0xc2c0 >> 2),
157 (0x0e00 << 16) | (0x8228 >> 2),
159 (0x0e00 << 16) | (0x829c >> 2),
161 (0x0e00 << 16) | (0x869c >> 2),
163 (0x0600 << 16) | (0x98f4 >> 2),
165 (0x0e00 << 16) | (0x98f8 >> 2),
167 (0x0e00 << 16) | (0x9900 >> 2),
169 (0x0e00 << 16) | (0xc260 >> 2),
171 (0x0e00 << 16) | (0x90e8 >> 2),
173 (0x0e00 << 16) | (0x3c000 >> 2),
175 (0x0e00 << 16) | (0x3c00c >> 2),
177 (0x0e00 << 16) | (0x8c1c >> 2),
179 (0x0e00 << 16) | (0x9700 >> 2),
181 (0x0e00 << 16) | (0xcd20 >> 2),
183 (0x4e00 << 16) | (0xcd20 >> 2),
185 (0x5e00 << 16) | (0xcd20 >> 2),
187 (0x6e00 << 16) | (0xcd20 >> 2),
189 (0x7e00 << 16) | (0xcd20 >> 2),
191 (0x8e00 << 16) | (0xcd20 >> 2),
193 (0x9e00 << 16) | (0xcd20 >> 2),
195 (0xae00 << 16) | (0xcd20 >> 2),
197 (0xbe00 << 16) | (0xcd20 >> 2),
199 (0x0e00 << 16) | (0x89bc >> 2),
201 (0x0e00 << 16) | (0x8900 >> 2),
204 (0x0e00 << 16) | (0xc130 >> 2),
206 (0x0e00 << 16) | (0xc134 >> 2),
208 (0x0e00 << 16) | (0xc1fc >> 2),
210 (0x0e00 << 16) | (0xc208 >> 2),
212 (0x0e00 << 16) | (0xc264 >> 2),
214 (0x0e00 << 16) | (0xc268 >> 2),
216 (0x0e00 << 16) | (0xc26c >> 2),
218 (0x0e00 << 16) | (0xc270 >> 2),
220 (0x0e00 << 16) | (0xc274 >> 2),
222 (0x0e00 << 16) | (0xc278 >> 2),
224 (0x0e00 << 16) | (0xc27c >> 2),
226 (0x0e00 << 16) | (0xc280 >> 2),
228 (0x0e00 << 16) | (0xc284 >> 2),
230 (0x0e00 << 16) | (0xc288 >> 2),
232 (0x0e00 << 16) | (0xc28c >> 2),
234 (0x0e00 << 16) | (0xc290 >> 2),
236 (0x0e00 << 16) | (0xc294 >> 2),
238 (0x0e00 << 16) | (0xc298 >> 2),
240 (0x0e00 << 16) | (0xc29c >> 2),
242 (0x0e00 << 16) | (0xc2a0 >> 2),
244 (0x0e00 << 16) | (0xc2a4 >> 2),
246 (0x0e00 << 16) | (0xc2a8 >> 2),
248 (0x0e00 << 16) | (0xc2ac >> 2),
250 (0x0e00 << 16) | (0xc2b0 >> 2),
252 (0x0e00 << 16) | (0x301d0 >> 2),
254 (0x0e00 << 16) | (0x30238 >> 2),
256 (0x0e00 << 16) | (0x30250 >> 2),
258 (0x0e00 << 16) | (0x30254 >> 2),
260 (0x0e00 << 16) | (0x30258 >> 2),
262 (0x0e00 << 16) | (0x3025c >> 2),
264 (0x4e00 << 16) | (0xc900 >> 2),
266 (0x5e00 << 16) | (0xc900 >> 2),
268 (0x6e00 << 16) | (0xc900 >> 2),
270 (0x7e00 << 16) | (0xc900 >> 2),
272 (0x8e00 << 16) | (0xc900 >> 2),
274 (0x9e00 << 16) | (0xc900 >> 2),
276 (0xae00 << 16) | (0xc900 >> 2),
278 (0xbe00 << 16) | (0xc900 >> 2),
280 (0x4e00 << 16) | (0xc904 >> 2),
282 (0x5e00 << 16) | (0xc904 >> 2),
284 (0x6e00 << 16) | (0xc904 >> 2),
286 (0x7e00 << 16) | (0xc904 >> 2),
288 (0x8e00 << 16) | (0xc904 >> 2),
290 (0x9e00 << 16) | (0xc904 >> 2),
292 (0xae00 << 16) | (0xc904 >> 2),
294 (0xbe00 << 16) | (0xc904 >> 2),
296 (0x4e00 << 16) | (0xc908 >> 2),
298 (0x5e00 << 16) | (0xc908 >> 2),
300 (0x6e00 << 16) | (0xc908 >> 2),
302 (0x7e00 << 16) | (0xc908 >> 2),
304 (0x8e00 << 16) | (0xc908 >> 2),
306 (0x9e00 << 16) | (0xc908 >> 2),
308 (0xae00 << 16) | (0xc908 >> 2),
310 (0xbe00 << 16) | (0xc908 >> 2),
312 (0x4e00 << 16) | (0xc90c >> 2),
314 (0x5e00 << 16) | (0xc90c >> 2),
316 (0x6e00 << 16) | (0xc90c >> 2),
318 (0x7e00 << 16) | (0xc90c >> 2),
320 (0x8e00 << 16) | (0xc90c >> 2),
322 (0x9e00 << 16) | (0xc90c >> 2),
324 (0xae00 << 16) | (0xc90c >> 2),
326 (0xbe00 << 16) | (0xc90c >> 2),
328 (0x4e00 << 16) | (0xc910 >> 2),
330 (0x5e00 << 16) | (0xc910 >> 2),
332 (0x6e00 << 16) | (0xc910 >> 2),
334 (0x7e00 << 16) | (0xc910 >> 2),
336 (0x8e00 << 16) | (0xc910 >> 2),
338 (0x9e00 << 16) | (0xc910 >> 2),
340 (0xae00 << 16) | (0xc910 >> 2),
342 (0xbe00 << 16) | (0xc910 >> 2),
344 (0x0e00 << 16) | (0xc99c >> 2),
346 (0x0e00 << 16) | (0x9834 >> 2),
348 (0x0000 << 16) | (0x30f00 >> 2),
350 (0x0001 << 16) | (0x30f00 >> 2),
352 (0x0000 << 16) | (0x30f04 >> 2),
354 (0x0001 << 16) | (0x30f04 >> 2),
356 (0x0000 << 16) | (0x30f08 >> 2),
358 (0x0001 << 16) | (0x30f08 >> 2),
360 (0x0000 << 16) | (0x30f0c >> 2),
362 (0x0001 << 16) | (0x30f0c >> 2),
364 (0x0600 << 16) | (0x9b7c >> 2),
366 (0x0e00 << 16) | (0x8a14 >> 2),
368 (0x0e00 << 16) | (0x8a18 >> 2),
370 (0x0600 << 16) | (0x30a00 >> 2),
372 (0x0e00 << 16) | (0x8bf0 >> 2),
374 (0x0e00 << 16) | (0x8bcc >> 2),
376 (0x0e00 << 16) | (0x8b24 >> 2),
378 (0x0e00 << 16) | (0x30a04 >> 2),
380 (0x0600 << 16) | (0x30a10 >> 2),
382 (0x0600 << 16) | (0x30a14 >> 2),
384 (0x0600 << 16) | (0x30a18 >> 2),
386 (0x0600 << 16) | (0x30a2c >> 2),
388 (0x0e00 << 16) | (0xc700 >> 2),
390 (0x0e00 << 16) | (0xc704 >> 2),
392 (0x0e00 << 16) | (0xc708 >> 2),
394 (0x0e00 << 16) | (0xc768 >> 2),
396 (0x0400 << 16) | (0xc770 >> 2),
398 (0x0400 << 16) | (0xc774 >> 2),
400 (0x0400 << 16) | (0xc778 >> 2),
402 (0x0400 << 16) | (0xc77c >> 2),
404 (0x0400 << 16) | (0xc780 >> 2),
406 (0x0400 << 16) | (0xc784 >> 2),
408 (0x0400 << 16) | (0xc788 >> 2),
410 (0x0400 << 16) | (0xc78c >> 2),
412 (0x0400 << 16) | (0xc798 >> 2),
414 (0x0400 << 16) | (0xc79c >> 2),
416 (0x0400 << 16) | (0xc7a0 >> 2),
418 (0x0400 << 16) | (0xc7a4 >> 2),
420 (0x0400 << 16) | (0xc7a8 >> 2),
422 (0x0400 << 16) | (0xc7ac >> 2),
424 (0x0400 << 16) | (0xc7b0 >> 2),
426 (0x0400 << 16) | (0xc7b4 >> 2),
428 (0x0e00 << 16) | (0x9100 >> 2),
430 (0x0e00 << 16) | (0x3c010 >> 2),
432 (0x0e00 << 16) | (0x92a8 >> 2),
434 (0x0e00 << 16) | (0x92ac >> 2),
436 (0x0e00 << 16) | (0x92b4 >> 2),
438 (0x0e00 << 16) | (0x92b8 >> 2),
440 (0x0e00 << 16) | (0x92bc >> 2),
442 (0x0e00 << 16) | (0x92c0 >> 2),
444 (0x0e00 << 16) | (0x92c4 >> 2),
446 (0x0e00 << 16) | (0x92c8 >> 2),
448 (0x0e00 << 16) | (0x92cc >> 2),
450 (0x0e00 << 16) | (0x92d0 >> 2),
452 (0x0e00 << 16) | (0x8c00 >> 2),
454 (0x0e00 << 16) | (0x8c04 >> 2),
456 (0x0e00 << 16) | (0x8c20 >> 2),
458 (0x0e00 << 16) | (0x8c38 >> 2),
460 (0x0e00 << 16) | (0x8c3c >> 2),
462 (0x0e00 << 16) | (0xae00 >> 2),
464 (0x0e00 << 16) | (0x9604 >> 2),
466 (0x0e00 << 16) | (0xac08 >> 2),
468 (0x0e00 << 16) | (0xac0c >> 2),
470 (0x0e00 << 16) | (0xac10 >> 2),
472 (0x0e00 << 16) | (0xac14 >> 2),
474 (0x0e00 << 16) | (0xac58 >> 2),
476 (0x0e00 << 16) | (0xac68 >> 2),
478 (0x0e00 << 16) | (0xac6c >> 2),
480 (0x0e00 << 16) | (0xac70 >> 2),
482 (0x0e00 << 16) | (0xac74 >> 2),
484 (0x0e00 << 16) | (0xac78 >> 2),
486 (0x0e00 << 16) | (0xac7c >> 2),
488 (0x0e00 << 16) | (0xac80 >> 2),
490 (0x0e00 << 16) | (0xac84 >> 2),
492 (0x0e00 << 16) | (0xac88 >> 2),
494 (0x0e00 << 16) | (0xac8c >> 2),
496 (0x0e00 << 16) | (0x970c >> 2),
498 (0x0e00 << 16) | (0x9714 >> 2),
500 (0x0e00 << 16) | (0x9718 >> 2),
502 (0x0e00 << 16) | (0x971c >> 2),
504 (0x0e00 << 16) | (0x31068 >> 2),
506 (0x4e00 << 16) | (0x31068 >> 2),
508 (0x5e00 << 16) | (0x31068 >> 2),
510 (0x6e00 << 16) | (0x31068 >> 2),
512 (0x7e00 << 16) | (0x31068 >> 2),
514 (0x8e00 << 16) | (0x31068 >> 2),
516 (0x9e00 << 16) | (0x31068 >> 2),
518 (0xae00 << 16) | (0x31068 >> 2),
520 (0xbe00 << 16) | (0x31068 >> 2),
522 (0x0e00 << 16) | (0xcd10 >> 2),
524 (0x0e00 << 16) | (0xcd14 >> 2),
526 (0x0e00 << 16) | (0x88b0 >> 2),
528 (0x0e00 << 16) | (0x88b4 >> 2),
530 (0x0e00 << 16) | (0x88b8 >> 2),
532 (0x0e00 << 16) | (0x88bc >> 2),
534 (0x0400 << 16) | (0x89c0 >> 2),
536 (0x0e00 << 16) | (0x88c4 >> 2),
538 (0x0e00 << 16) | (0x88c8 >> 2),
540 (0x0e00 << 16) | (0x88d0 >> 2),
542 (0x0e00 << 16) | (0x88d4 >> 2),
544 (0x0e00 << 16) | (0x88d8 >> 2),
546 (0x0e00 << 16) | (0x8980 >> 2),
548 (0x0e00 << 16) | (0x30938 >> 2),
550 (0x0e00 << 16) | (0x3093c >> 2),
552 (0x0e00 << 16) | (0x30940 >> 2),
554 (0x0e00 << 16) | (0x89a0 >> 2),
556 (0x0e00 << 16) | (0x30900 >> 2),
558 (0x0e00 << 16) | (0x30904 >> 2),
560 (0x0e00 << 16) | (0x89b4 >> 2),
562 (0x0e00 << 16) | (0x3c210 >> 2),
564 (0x0e00 << 16) | (0x3c214 >> 2),
566 (0x0e00 << 16) | (0x3c218 >> 2),
568 (0x0e00 << 16) | (0x8904 >> 2),
571 (0x0e00 << 16) | (0x8c28 >> 2),
572 (0x0e00 << 16) | (0x8c2c >> 2),
573 (0x0e00 << 16) | (0x8c30 >> 2),
574 (0x0e00 << 16) | (0x8c34 >> 2),
575 (0x0e00 << 16) | (0x9600 >> 2),
578 static const u32 kalindi_rlc_save_restore_register_list
[] =
580 (0x0e00 << 16) | (0xc12c >> 2),
582 (0x0e00 << 16) | (0xc140 >> 2),
584 (0x0e00 << 16) | (0xc150 >> 2),
586 (0x0e00 << 16) | (0xc15c >> 2),
588 (0x0e00 << 16) | (0xc168 >> 2),
590 (0x0e00 << 16) | (0xc170 >> 2),
592 (0x0e00 << 16) | (0xc204 >> 2),
594 (0x0e00 << 16) | (0xc2b4 >> 2),
596 (0x0e00 << 16) | (0xc2b8 >> 2),
598 (0x0e00 << 16) | (0xc2bc >> 2),
600 (0x0e00 << 16) | (0xc2c0 >> 2),
602 (0x0e00 << 16) | (0x8228 >> 2),
604 (0x0e00 << 16) | (0x829c >> 2),
606 (0x0e00 << 16) | (0x869c >> 2),
608 (0x0600 << 16) | (0x98f4 >> 2),
610 (0x0e00 << 16) | (0x98f8 >> 2),
612 (0x0e00 << 16) | (0x9900 >> 2),
614 (0x0e00 << 16) | (0xc260 >> 2),
616 (0x0e00 << 16) | (0x90e8 >> 2),
618 (0x0e00 << 16) | (0x3c000 >> 2),
620 (0x0e00 << 16) | (0x3c00c >> 2),
622 (0x0e00 << 16) | (0x8c1c >> 2),
624 (0x0e00 << 16) | (0x9700 >> 2),
626 (0x0e00 << 16) | (0xcd20 >> 2),
628 (0x4e00 << 16) | (0xcd20 >> 2),
630 (0x5e00 << 16) | (0xcd20 >> 2),
632 (0x6e00 << 16) | (0xcd20 >> 2),
634 (0x7e00 << 16) | (0xcd20 >> 2),
636 (0x0e00 << 16) | (0x89bc >> 2),
638 (0x0e00 << 16) | (0x8900 >> 2),
641 (0x0e00 << 16) | (0xc130 >> 2),
643 (0x0e00 << 16) | (0xc134 >> 2),
645 (0x0e00 << 16) | (0xc1fc >> 2),
647 (0x0e00 << 16) | (0xc208 >> 2),
649 (0x0e00 << 16) | (0xc264 >> 2),
651 (0x0e00 << 16) | (0xc268 >> 2),
653 (0x0e00 << 16) | (0xc26c >> 2),
655 (0x0e00 << 16) | (0xc270 >> 2),
657 (0x0e00 << 16) | (0xc274 >> 2),
659 (0x0e00 << 16) | (0xc28c >> 2),
661 (0x0e00 << 16) | (0xc290 >> 2),
663 (0x0e00 << 16) | (0xc294 >> 2),
665 (0x0e00 << 16) | (0xc298 >> 2),
667 (0x0e00 << 16) | (0xc2a0 >> 2),
669 (0x0e00 << 16) | (0xc2a4 >> 2),
671 (0x0e00 << 16) | (0xc2a8 >> 2),
673 (0x0e00 << 16) | (0xc2ac >> 2),
675 (0x0e00 << 16) | (0x301d0 >> 2),
677 (0x0e00 << 16) | (0x30238 >> 2),
679 (0x0e00 << 16) | (0x30250 >> 2),
681 (0x0e00 << 16) | (0x30254 >> 2),
683 (0x0e00 << 16) | (0x30258 >> 2),
685 (0x0e00 << 16) | (0x3025c >> 2),
687 (0x4e00 << 16) | (0xc900 >> 2),
689 (0x5e00 << 16) | (0xc900 >> 2),
691 (0x6e00 << 16) | (0xc900 >> 2),
693 (0x7e00 << 16) | (0xc900 >> 2),
695 (0x4e00 << 16) | (0xc904 >> 2),
697 (0x5e00 << 16) | (0xc904 >> 2),
699 (0x6e00 << 16) | (0xc904 >> 2),
701 (0x7e00 << 16) | (0xc904 >> 2),
703 (0x4e00 << 16) | (0xc908 >> 2),
705 (0x5e00 << 16) | (0xc908 >> 2),
707 (0x6e00 << 16) | (0xc908 >> 2),
709 (0x7e00 << 16) | (0xc908 >> 2),
711 (0x4e00 << 16) | (0xc90c >> 2),
713 (0x5e00 << 16) | (0xc90c >> 2),
715 (0x6e00 << 16) | (0xc90c >> 2),
717 (0x7e00 << 16) | (0xc90c >> 2),
719 (0x4e00 << 16) | (0xc910 >> 2),
721 (0x5e00 << 16) | (0xc910 >> 2),
723 (0x6e00 << 16) | (0xc910 >> 2),
725 (0x7e00 << 16) | (0xc910 >> 2),
727 (0x0e00 << 16) | (0xc99c >> 2),
729 (0x0e00 << 16) | (0x9834 >> 2),
731 (0x0000 << 16) | (0x30f00 >> 2),
733 (0x0000 << 16) | (0x30f04 >> 2),
735 (0x0000 << 16) | (0x30f08 >> 2),
737 (0x0000 << 16) | (0x30f0c >> 2),
739 (0x0600 << 16) | (0x9b7c >> 2),
741 (0x0e00 << 16) | (0x8a14 >> 2),
743 (0x0e00 << 16) | (0x8a18 >> 2),
745 (0x0600 << 16) | (0x30a00 >> 2),
747 (0x0e00 << 16) | (0x8bf0 >> 2),
749 (0x0e00 << 16) | (0x8bcc >> 2),
751 (0x0e00 << 16) | (0x8b24 >> 2),
753 (0x0e00 << 16) | (0x30a04 >> 2),
755 (0x0600 << 16) | (0x30a10 >> 2),
757 (0x0600 << 16) | (0x30a14 >> 2),
759 (0x0600 << 16) | (0x30a18 >> 2),
761 (0x0600 << 16) | (0x30a2c >> 2),
763 (0x0e00 << 16) | (0xc700 >> 2),
765 (0x0e00 << 16) | (0xc704 >> 2),
767 (0x0e00 << 16) | (0xc708 >> 2),
769 (0x0e00 << 16) | (0xc768 >> 2),
771 (0x0400 << 16) | (0xc770 >> 2),
773 (0x0400 << 16) | (0xc774 >> 2),
775 (0x0400 << 16) | (0xc798 >> 2),
777 (0x0400 << 16) | (0xc79c >> 2),
779 (0x0e00 << 16) | (0x9100 >> 2),
781 (0x0e00 << 16) | (0x3c010 >> 2),
783 (0x0e00 << 16) | (0x8c00 >> 2),
785 (0x0e00 << 16) | (0x8c04 >> 2),
787 (0x0e00 << 16) | (0x8c20 >> 2),
789 (0x0e00 << 16) | (0x8c38 >> 2),
791 (0x0e00 << 16) | (0x8c3c >> 2),
793 (0x0e00 << 16) | (0xae00 >> 2),
795 (0x0e00 << 16) | (0x9604 >> 2),
797 (0x0e00 << 16) | (0xac08 >> 2),
799 (0x0e00 << 16) | (0xac0c >> 2),
801 (0x0e00 << 16) | (0xac10 >> 2),
803 (0x0e00 << 16) | (0xac14 >> 2),
805 (0x0e00 << 16) | (0xac58 >> 2),
807 (0x0e00 << 16) | (0xac68 >> 2),
809 (0x0e00 << 16) | (0xac6c >> 2),
811 (0x0e00 << 16) | (0xac70 >> 2),
813 (0x0e00 << 16) | (0xac74 >> 2),
815 (0x0e00 << 16) | (0xac78 >> 2),
817 (0x0e00 << 16) | (0xac7c >> 2),
819 (0x0e00 << 16) | (0xac80 >> 2),
821 (0x0e00 << 16) | (0xac84 >> 2),
823 (0x0e00 << 16) | (0xac88 >> 2),
825 (0x0e00 << 16) | (0xac8c >> 2),
827 (0x0e00 << 16) | (0x970c >> 2),
829 (0x0e00 << 16) | (0x9714 >> 2),
831 (0x0e00 << 16) | (0x9718 >> 2),
833 (0x0e00 << 16) | (0x971c >> 2),
835 (0x0e00 << 16) | (0x31068 >> 2),
837 (0x4e00 << 16) | (0x31068 >> 2),
839 (0x5e00 << 16) | (0x31068 >> 2),
841 (0x6e00 << 16) | (0x31068 >> 2),
843 (0x7e00 << 16) | (0x31068 >> 2),
845 (0x0e00 << 16) | (0xcd10 >> 2),
847 (0x0e00 << 16) | (0xcd14 >> 2),
849 (0x0e00 << 16) | (0x88b0 >> 2),
851 (0x0e00 << 16) | (0x88b4 >> 2),
853 (0x0e00 << 16) | (0x88b8 >> 2),
855 (0x0e00 << 16) | (0x88bc >> 2),
857 (0x0400 << 16) | (0x89c0 >> 2),
859 (0x0e00 << 16) | (0x88c4 >> 2),
861 (0x0e00 << 16) | (0x88c8 >> 2),
863 (0x0e00 << 16) | (0x88d0 >> 2),
865 (0x0e00 << 16) | (0x88d4 >> 2),
867 (0x0e00 << 16) | (0x88d8 >> 2),
869 (0x0e00 << 16) | (0x8980 >> 2),
871 (0x0e00 << 16) | (0x30938 >> 2),
873 (0x0e00 << 16) | (0x3093c >> 2),
875 (0x0e00 << 16) | (0x30940 >> 2),
877 (0x0e00 << 16) | (0x89a0 >> 2),
879 (0x0e00 << 16) | (0x30900 >> 2),
881 (0x0e00 << 16) | (0x30904 >> 2),
883 (0x0e00 << 16) | (0x89b4 >> 2),
885 (0x0e00 << 16) | (0x3e1fc >> 2),
887 (0x0e00 << 16) | (0x3c210 >> 2),
889 (0x0e00 << 16) | (0x3c214 >> 2),
891 (0x0e00 << 16) | (0x3c218 >> 2),
893 (0x0e00 << 16) | (0x8904 >> 2),
896 (0x0e00 << 16) | (0x8c28 >> 2),
897 (0x0e00 << 16) | (0x8c2c >> 2),
898 (0x0e00 << 16) | (0x8c30 >> 2),
899 (0x0e00 << 16) | (0x8c34 >> 2),
900 (0x0e00 << 16) | (0x9600 >> 2),
903 static const u32 bonaire_golden_spm_registers
[] =
905 0x30800, 0xe0ffffff, 0xe0000000
908 static const u32 bonaire_golden_common_registers
[] =
910 0xc770, 0xffffffff, 0x00000800,
911 0xc774, 0xffffffff, 0x00000800,
912 0xc798, 0xffffffff, 0x00007fbf,
913 0xc79c, 0xffffffff, 0x00007faf
916 static const u32 bonaire_golden_registers
[] =
918 0x3354, 0x00000333, 0x00000333,
919 0x3350, 0x000c0fc0, 0x00040200,
920 0x9a10, 0x00010000, 0x00058208,
921 0x3c000, 0xffff1fff, 0x00140000,
922 0x3c200, 0xfdfc0fff, 0x00000100,
923 0x3c234, 0x40000000, 0x40000200,
924 0x9830, 0xffffffff, 0x00000000,
925 0x9834, 0xf00fffff, 0x00000400,
926 0x9838, 0x0002021c, 0x00020200,
927 0xc78, 0x00000080, 0x00000000,
928 0x5bb0, 0x000000f0, 0x00000070,
929 0x5bc0, 0xf0311fff, 0x80300000,
930 0x98f8, 0x73773777, 0x12010001,
931 0x350c, 0x00810000, 0x408af000,
932 0x7030, 0x31000111, 0x00000011,
933 0x2f48, 0x73773777, 0x12010001,
934 0x220c, 0x00007fb6, 0x0021a1b1,
935 0x2210, 0x00007fb6, 0x002021b1,
936 0x2180, 0x00007fb6, 0x00002191,
937 0x2218, 0x00007fb6, 0x002121b1,
938 0x221c, 0x00007fb6, 0x002021b1,
939 0x21dc, 0x00007fb6, 0x00002191,
940 0x21e0, 0x00007fb6, 0x00002191,
941 0x3628, 0x0000003f, 0x0000000a,
942 0x362c, 0x0000003f, 0x0000000a,
943 0x2ae4, 0x00073ffe, 0x000022a2,
944 0x240c, 0x000007ff, 0x00000000,
945 0x8a14, 0xf000003f, 0x00000007,
946 0x8bf0, 0x00002001, 0x00000001,
947 0x8b24, 0xffffffff, 0x00ffffff,
948 0x30a04, 0x0000ff0f, 0x00000000,
949 0x28a4c, 0x07ffffff, 0x06000000,
950 0x4d8, 0x00000fff, 0x00000100,
951 0x3e78, 0x00000001, 0x00000002,
952 0x9100, 0x03000000, 0x0362c688,
953 0x8c00, 0x000000ff, 0x00000001,
954 0xe40, 0x00001fff, 0x00001fff,
955 0x9060, 0x0000007f, 0x00000020,
956 0x9508, 0x00010000, 0x00010000,
957 0xac14, 0x000003ff, 0x000000f3,
958 0xac0c, 0xffffffff, 0x00001032
961 static const u32 bonaire_mgcg_cgcg_init
[] =
963 0xc420, 0xffffffff, 0xfffffffc,
964 0x30800, 0xffffffff, 0xe0000000,
965 0x3c2a0, 0xffffffff, 0x00000100,
966 0x3c208, 0xffffffff, 0x00000100,
967 0x3c2c0, 0xffffffff, 0xc0000100,
968 0x3c2c8, 0xffffffff, 0xc0000100,
969 0x3c2c4, 0xffffffff, 0xc0000100,
970 0x55e4, 0xffffffff, 0x00600100,
971 0x3c280, 0xffffffff, 0x00000100,
972 0x3c214, 0xffffffff, 0x06000100,
973 0x3c220, 0xffffffff, 0x00000100,
974 0x3c218, 0xffffffff, 0x06000100,
975 0x3c204, 0xffffffff, 0x00000100,
976 0x3c2e0, 0xffffffff, 0x00000100,
977 0x3c224, 0xffffffff, 0x00000100,
978 0x3c200, 0xffffffff, 0x00000100,
979 0x3c230, 0xffffffff, 0x00000100,
980 0x3c234, 0xffffffff, 0x00000100,
981 0x3c250, 0xffffffff, 0x00000100,
982 0x3c254, 0xffffffff, 0x00000100,
983 0x3c258, 0xffffffff, 0x00000100,
984 0x3c25c, 0xffffffff, 0x00000100,
985 0x3c260, 0xffffffff, 0x00000100,
986 0x3c27c, 0xffffffff, 0x00000100,
987 0x3c278, 0xffffffff, 0x00000100,
988 0x3c210, 0xffffffff, 0x06000100,
989 0x3c290, 0xffffffff, 0x00000100,
990 0x3c274, 0xffffffff, 0x00000100,
991 0x3c2b4, 0xffffffff, 0x00000100,
992 0x3c2b0, 0xffffffff, 0x00000100,
993 0x3c270, 0xffffffff, 0x00000100,
994 0x30800, 0xffffffff, 0xe0000000,
995 0x3c020, 0xffffffff, 0x00010000,
996 0x3c024, 0xffffffff, 0x00030002,
997 0x3c028, 0xffffffff, 0x00040007,
998 0x3c02c, 0xffffffff, 0x00060005,
999 0x3c030, 0xffffffff, 0x00090008,
1000 0x3c034, 0xffffffff, 0x00010000,
1001 0x3c038, 0xffffffff, 0x00030002,
1002 0x3c03c, 0xffffffff, 0x00040007,
1003 0x3c040, 0xffffffff, 0x00060005,
1004 0x3c044, 0xffffffff, 0x00090008,
1005 0x3c048, 0xffffffff, 0x00010000,
1006 0x3c04c, 0xffffffff, 0x00030002,
1007 0x3c050, 0xffffffff, 0x00040007,
1008 0x3c054, 0xffffffff, 0x00060005,
1009 0x3c058, 0xffffffff, 0x00090008,
1010 0x3c05c, 0xffffffff, 0x00010000,
1011 0x3c060, 0xffffffff, 0x00030002,
1012 0x3c064, 0xffffffff, 0x00040007,
1013 0x3c068, 0xffffffff, 0x00060005,
1014 0x3c06c, 0xffffffff, 0x00090008,
1015 0x3c070, 0xffffffff, 0x00010000,
1016 0x3c074, 0xffffffff, 0x00030002,
1017 0x3c078, 0xffffffff, 0x00040007,
1018 0x3c07c, 0xffffffff, 0x00060005,
1019 0x3c080, 0xffffffff, 0x00090008,
1020 0x3c084, 0xffffffff, 0x00010000,
1021 0x3c088, 0xffffffff, 0x00030002,
1022 0x3c08c, 0xffffffff, 0x00040007,
1023 0x3c090, 0xffffffff, 0x00060005,
1024 0x3c094, 0xffffffff, 0x00090008,
1025 0x3c098, 0xffffffff, 0x00010000,
1026 0x3c09c, 0xffffffff, 0x00030002,
1027 0x3c0a0, 0xffffffff, 0x00040007,
1028 0x3c0a4, 0xffffffff, 0x00060005,
1029 0x3c0a8, 0xffffffff, 0x00090008,
1030 0x3c000, 0xffffffff, 0x96e00200,
1031 0x8708, 0xffffffff, 0x00900100,
1032 0xc424, 0xffffffff, 0x0020003f,
1033 0x38, 0xffffffff, 0x0140001c,
1034 0x3c, 0x000f0000, 0x000f0000,
1035 0x220, 0xffffffff, 0xC060000C,
1036 0x224, 0xc0000fff, 0x00000100,
1037 0xf90, 0xffffffff, 0x00000100,
1038 0xf98, 0x00000101, 0x00000000,
1039 0x20a8, 0xffffffff, 0x00000104,
1040 0x55e4, 0xff000fff, 0x00000100,
1041 0x30cc, 0xc0000fff, 0x00000104,
1042 0xc1e4, 0x00000001, 0x00000001,
1043 0xd00c, 0xff000ff0, 0x00000100,
1044 0xd80c, 0xff000ff0, 0x00000100
1047 static const u32 spectre_golden_spm_registers
[] =
1049 0x30800, 0xe0ffffff, 0xe0000000
1052 static const u32 spectre_golden_common_registers
[] =
1054 0xc770, 0xffffffff, 0x00000800,
1055 0xc774, 0xffffffff, 0x00000800,
1056 0xc798, 0xffffffff, 0x00007fbf,
1057 0xc79c, 0xffffffff, 0x00007faf
1060 static const u32 spectre_golden_registers
[] =
1062 0x3c000, 0xffff1fff, 0x96940200,
1063 0x3c00c, 0xffff0001, 0xff000000,
1064 0x3c200, 0xfffc0fff, 0x00000100,
1065 0x6ed8, 0x00010101, 0x00010000,
1066 0x9834, 0xf00fffff, 0x00000400,
1067 0x9838, 0xfffffffc, 0x00020200,
1068 0x5bb0, 0x000000f0, 0x00000070,
1069 0x5bc0, 0xf0311fff, 0x80300000,
1070 0x98f8, 0x73773777, 0x12010001,
1071 0x9b7c, 0x00ff0000, 0x00fc0000,
1072 0x2f48, 0x73773777, 0x12010001,
1073 0x8a14, 0xf000003f, 0x00000007,
1074 0x8b24, 0xffffffff, 0x00ffffff,
1075 0x28350, 0x3f3f3fff, 0x00000082,
1076 0x28355, 0x0000003f, 0x00000000,
1077 0x3e78, 0x00000001, 0x00000002,
1078 0x913c, 0xffff03df, 0x00000004,
1079 0xc768, 0x00000008, 0x00000008,
1080 0x8c00, 0x000008ff, 0x00000800,
1081 0x9508, 0x00010000, 0x00010000,
1082 0xac0c, 0xffffffff, 0x54763210,
1083 0x214f8, 0x01ff01ff, 0x00000002,
1084 0x21498, 0x007ff800, 0x00200000,
1085 0x2015c, 0xffffffff, 0x00000f40,
1086 0x30934, 0xffffffff, 0x00000001
1089 static const u32 spectre_mgcg_cgcg_init
[] =
1091 0xc420, 0xffffffff, 0xfffffffc,
1092 0x30800, 0xffffffff, 0xe0000000,
1093 0x3c2a0, 0xffffffff, 0x00000100,
1094 0x3c208, 0xffffffff, 0x00000100,
1095 0x3c2c0, 0xffffffff, 0x00000100,
1096 0x3c2c8, 0xffffffff, 0x00000100,
1097 0x3c2c4, 0xffffffff, 0x00000100,
1098 0x55e4, 0xffffffff, 0x00600100,
1099 0x3c280, 0xffffffff, 0x00000100,
1100 0x3c214, 0xffffffff, 0x06000100,
1101 0x3c220, 0xffffffff, 0x00000100,
1102 0x3c218, 0xffffffff, 0x06000100,
1103 0x3c204, 0xffffffff, 0x00000100,
1104 0x3c2e0, 0xffffffff, 0x00000100,
1105 0x3c224, 0xffffffff, 0x00000100,
1106 0x3c200, 0xffffffff, 0x00000100,
1107 0x3c230, 0xffffffff, 0x00000100,
1108 0x3c234, 0xffffffff, 0x00000100,
1109 0x3c250, 0xffffffff, 0x00000100,
1110 0x3c254, 0xffffffff, 0x00000100,
1111 0x3c258, 0xffffffff, 0x00000100,
1112 0x3c25c, 0xffffffff, 0x00000100,
1113 0x3c260, 0xffffffff, 0x00000100,
1114 0x3c27c, 0xffffffff, 0x00000100,
1115 0x3c278, 0xffffffff, 0x00000100,
1116 0x3c210, 0xffffffff, 0x06000100,
1117 0x3c290, 0xffffffff, 0x00000100,
1118 0x3c274, 0xffffffff, 0x00000100,
1119 0x3c2b4, 0xffffffff, 0x00000100,
1120 0x3c2b0, 0xffffffff, 0x00000100,
1121 0x3c270, 0xffffffff, 0x00000100,
1122 0x30800, 0xffffffff, 0xe0000000,
1123 0x3c020, 0xffffffff, 0x00010000,
1124 0x3c024, 0xffffffff, 0x00030002,
1125 0x3c028, 0xffffffff, 0x00040007,
1126 0x3c02c, 0xffffffff, 0x00060005,
1127 0x3c030, 0xffffffff, 0x00090008,
1128 0x3c034, 0xffffffff, 0x00010000,
1129 0x3c038, 0xffffffff, 0x00030002,
1130 0x3c03c, 0xffffffff, 0x00040007,
1131 0x3c040, 0xffffffff, 0x00060005,
1132 0x3c044, 0xffffffff, 0x00090008,
1133 0x3c048, 0xffffffff, 0x00010000,
1134 0x3c04c, 0xffffffff, 0x00030002,
1135 0x3c050, 0xffffffff, 0x00040007,
1136 0x3c054, 0xffffffff, 0x00060005,
1137 0x3c058, 0xffffffff, 0x00090008,
1138 0x3c05c, 0xffffffff, 0x00010000,
1139 0x3c060, 0xffffffff, 0x00030002,
1140 0x3c064, 0xffffffff, 0x00040007,
1141 0x3c068, 0xffffffff, 0x00060005,
1142 0x3c06c, 0xffffffff, 0x00090008,
1143 0x3c070, 0xffffffff, 0x00010000,
1144 0x3c074, 0xffffffff, 0x00030002,
1145 0x3c078, 0xffffffff, 0x00040007,
1146 0x3c07c, 0xffffffff, 0x00060005,
1147 0x3c080, 0xffffffff, 0x00090008,
1148 0x3c084, 0xffffffff, 0x00010000,
1149 0x3c088, 0xffffffff, 0x00030002,
1150 0x3c08c, 0xffffffff, 0x00040007,
1151 0x3c090, 0xffffffff, 0x00060005,
1152 0x3c094, 0xffffffff, 0x00090008,
1153 0x3c098, 0xffffffff, 0x00010000,
1154 0x3c09c, 0xffffffff, 0x00030002,
1155 0x3c0a0, 0xffffffff, 0x00040007,
1156 0x3c0a4, 0xffffffff, 0x00060005,
1157 0x3c0a8, 0xffffffff, 0x00090008,
1158 0x3c0ac, 0xffffffff, 0x00010000,
1159 0x3c0b0, 0xffffffff, 0x00030002,
1160 0x3c0b4, 0xffffffff, 0x00040007,
1161 0x3c0b8, 0xffffffff, 0x00060005,
1162 0x3c0bc, 0xffffffff, 0x00090008,
1163 0x3c000, 0xffffffff, 0x96e00200,
1164 0x8708, 0xffffffff, 0x00900100,
1165 0xc424, 0xffffffff, 0x0020003f,
1166 0x38, 0xffffffff, 0x0140001c,
1167 0x3c, 0x000f0000, 0x000f0000,
1168 0x220, 0xffffffff, 0xC060000C,
1169 0x224, 0xc0000fff, 0x00000100,
1170 0xf90, 0xffffffff, 0x00000100,
1171 0xf98, 0x00000101, 0x00000000,
1172 0x20a8, 0xffffffff, 0x00000104,
1173 0x55e4, 0xff000fff, 0x00000100,
1174 0x30cc, 0xc0000fff, 0x00000104,
1175 0xc1e4, 0x00000001, 0x00000001,
1176 0xd00c, 0xff000ff0, 0x00000100,
1177 0xd80c, 0xff000ff0, 0x00000100
1180 static const u32 kalindi_golden_spm_registers
[] =
1182 0x30800, 0xe0ffffff, 0xe0000000
1185 static const u32 kalindi_golden_common_registers
[] =
1187 0xc770, 0xffffffff, 0x00000800,
1188 0xc774, 0xffffffff, 0x00000800,
1189 0xc798, 0xffffffff, 0x00007fbf,
1190 0xc79c, 0xffffffff, 0x00007faf
1193 static const u32 kalindi_golden_registers
[] =
1195 0x3c000, 0xffffdfff, 0x6e944040,
1196 0x55e4, 0xff607fff, 0xfc000100,
1197 0x3c220, 0xff000fff, 0x00000100,
1198 0x3c224, 0xff000fff, 0x00000100,
1199 0x3c200, 0xfffc0fff, 0x00000100,
1200 0x6ed8, 0x00010101, 0x00010000,
1201 0x9830, 0xffffffff, 0x00000000,
1202 0x9834, 0xf00fffff, 0x00000400,
1203 0x5bb0, 0x000000f0, 0x00000070,
1204 0x5bc0, 0xf0311fff, 0x80300000,
1205 0x98f8, 0x73773777, 0x12010001,
1206 0x98fc, 0xffffffff, 0x00000010,
1207 0x9b7c, 0x00ff0000, 0x00fc0000,
1208 0x8030, 0x00001f0f, 0x0000100a,
1209 0x2f48, 0x73773777, 0x12010001,
1210 0x2408, 0x000fffff, 0x000c007f,
1211 0x8a14, 0xf000003f, 0x00000007,
1212 0x8b24, 0x3fff3fff, 0x00ffcfff,
1213 0x30a04, 0x0000ff0f, 0x00000000,
1214 0x28a4c, 0x07ffffff, 0x06000000,
1215 0x4d8, 0x00000fff, 0x00000100,
1216 0x3e78, 0x00000001, 0x00000002,
1217 0xc768, 0x00000008, 0x00000008,
1218 0x8c00, 0x000000ff, 0x00000003,
1219 0x214f8, 0x01ff01ff, 0x00000002,
1220 0x21498, 0x007ff800, 0x00200000,
1221 0x2015c, 0xffffffff, 0x00000f40,
1222 0x88c4, 0x001f3ae3, 0x00000082,
1223 0x88d4, 0x0000001f, 0x00000010,
1224 0x30934, 0xffffffff, 0x00000000
1227 static const u32 kalindi_mgcg_cgcg_init
[] =
1229 0xc420, 0xffffffff, 0xfffffffc,
1230 0x30800, 0xffffffff, 0xe0000000,
1231 0x3c2a0, 0xffffffff, 0x00000100,
1232 0x3c208, 0xffffffff, 0x00000100,
1233 0x3c2c0, 0xffffffff, 0x00000100,
1234 0x3c2c8, 0xffffffff, 0x00000100,
1235 0x3c2c4, 0xffffffff, 0x00000100,
1236 0x55e4, 0xffffffff, 0x00600100,
1237 0x3c280, 0xffffffff, 0x00000100,
1238 0x3c214, 0xffffffff, 0x06000100,
1239 0x3c220, 0xffffffff, 0x00000100,
1240 0x3c218, 0xffffffff, 0x06000100,
1241 0x3c204, 0xffffffff, 0x00000100,
1242 0x3c2e0, 0xffffffff, 0x00000100,
1243 0x3c224, 0xffffffff, 0x00000100,
1244 0x3c200, 0xffffffff, 0x00000100,
1245 0x3c230, 0xffffffff, 0x00000100,
1246 0x3c234, 0xffffffff, 0x00000100,
1247 0x3c250, 0xffffffff, 0x00000100,
1248 0x3c254, 0xffffffff, 0x00000100,
1249 0x3c258, 0xffffffff, 0x00000100,
1250 0x3c25c, 0xffffffff, 0x00000100,
1251 0x3c260, 0xffffffff, 0x00000100,
1252 0x3c27c, 0xffffffff, 0x00000100,
1253 0x3c278, 0xffffffff, 0x00000100,
1254 0x3c210, 0xffffffff, 0x06000100,
1255 0x3c290, 0xffffffff, 0x00000100,
1256 0x3c274, 0xffffffff, 0x00000100,
1257 0x3c2b4, 0xffffffff, 0x00000100,
1258 0x3c2b0, 0xffffffff, 0x00000100,
1259 0x3c270, 0xffffffff, 0x00000100,
1260 0x30800, 0xffffffff, 0xe0000000,
1261 0x3c020, 0xffffffff, 0x00010000,
1262 0x3c024, 0xffffffff, 0x00030002,
1263 0x3c028, 0xffffffff, 0x00040007,
1264 0x3c02c, 0xffffffff, 0x00060005,
1265 0x3c030, 0xffffffff, 0x00090008,
1266 0x3c034, 0xffffffff, 0x00010000,
1267 0x3c038, 0xffffffff, 0x00030002,
1268 0x3c03c, 0xffffffff, 0x00040007,
1269 0x3c040, 0xffffffff, 0x00060005,
1270 0x3c044, 0xffffffff, 0x00090008,
1271 0x3c000, 0xffffffff, 0x96e00200,
1272 0x8708, 0xffffffff, 0x00900100,
1273 0xc424, 0xffffffff, 0x0020003f,
1274 0x38, 0xffffffff, 0x0140001c,
1275 0x3c, 0x000f0000, 0x000f0000,
1276 0x220, 0xffffffff, 0xC060000C,
1277 0x224, 0xc0000fff, 0x00000100,
1278 0x20a8, 0xffffffff, 0x00000104,
1279 0x55e4, 0xff000fff, 0x00000100,
1280 0x30cc, 0xc0000fff, 0x00000104,
1281 0xc1e4, 0x00000001, 0x00000001,
1282 0xd00c, 0xff000ff0, 0x00000100,
1283 0xd80c, 0xff000ff0, 0x00000100
1286 static void cik_init_golden_registers(struct radeon_device
*rdev
)
1288 switch (rdev
->family
) {
1290 radeon_program_register_sequence(rdev
,
1291 bonaire_mgcg_cgcg_init
,
1292 (const u32
)ARRAY_SIZE(bonaire_mgcg_cgcg_init
));
1293 radeon_program_register_sequence(rdev
,
1294 bonaire_golden_registers
,
1295 (const u32
)ARRAY_SIZE(bonaire_golden_registers
));
1296 radeon_program_register_sequence(rdev
,
1297 bonaire_golden_common_registers
,
1298 (const u32
)ARRAY_SIZE(bonaire_golden_common_registers
));
1299 radeon_program_register_sequence(rdev
,
1300 bonaire_golden_spm_registers
,
1301 (const u32
)ARRAY_SIZE(bonaire_golden_spm_registers
));
1304 radeon_program_register_sequence(rdev
,
1305 kalindi_mgcg_cgcg_init
,
1306 (const u32
)ARRAY_SIZE(kalindi_mgcg_cgcg_init
));
1307 radeon_program_register_sequence(rdev
,
1308 kalindi_golden_registers
,
1309 (const u32
)ARRAY_SIZE(kalindi_golden_registers
));
1310 radeon_program_register_sequence(rdev
,
1311 kalindi_golden_common_registers
,
1312 (const u32
)ARRAY_SIZE(kalindi_golden_common_registers
));
1313 radeon_program_register_sequence(rdev
,
1314 kalindi_golden_spm_registers
,
1315 (const u32
)ARRAY_SIZE(kalindi_golden_spm_registers
));
1318 radeon_program_register_sequence(rdev
,
1319 spectre_mgcg_cgcg_init
,
1320 (const u32
)ARRAY_SIZE(spectre_mgcg_cgcg_init
));
1321 radeon_program_register_sequence(rdev
,
1322 spectre_golden_registers
,
1323 (const u32
)ARRAY_SIZE(spectre_golden_registers
));
1324 radeon_program_register_sequence(rdev
,
1325 spectre_golden_common_registers
,
1326 (const u32
)ARRAY_SIZE(spectre_golden_common_registers
));
1327 radeon_program_register_sequence(rdev
,
1328 spectre_golden_spm_registers
,
1329 (const u32
)ARRAY_SIZE(spectre_golden_spm_registers
));
1337 * cik_get_xclk - get the xclk
1339 * @rdev: radeon_device pointer
1341 * Returns the reference clock used by the gfx engine
1344 u32
cik_get_xclk(struct radeon_device
*rdev
)
1346 u32 reference_clock
= rdev
->clock
.spll
.reference_freq
;
1348 if (rdev
->flags
& RADEON_IS_IGP
) {
1349 if (RREG32_SMC(GENERAL_PWRMGT
) & GPU_COUNTER_CLK
)
1350 return reference_clock
/ 2;
1352 if (RREG32_SMC(CG_CLKPIN_CNTL
) & XTALIN_DIVIDE
)
1353 return reference_clock
/ 4;
1355 return reference_clock
;
1359 * cik_mm_rdoorbell - read a doorbell dword
1361 * @rdev: radeon_device pointer
1362 * @offset: byte offset into the aperture
1364 * Returns the value in the doorbell aperture at the
1365 * requested offset (CIK).
1367 u32
cik_mm_rdoorbell(struct radeon_device
*rdev
, u32 offset
)
1369 if (offset
< rdev
->doorbell
.size
) {
1370 return readl(((void __iomem
*)rdev
->doorbell
.ptr
) + offset
);
1372 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset
);
1378 * cik_mm_wdoorbell - write a doorbell dword
1380 * @rdev: radeon_device pointer
1381 * @offset: byte offset into the aperture
1382 * @v: value to write
1384 * Writes @v to the doorbell aperture at the
1385 * requested offset (CIK).
1387 void cik_mm_wdoorbell(struct radeon_device
*rdev
, u32 offset
, u32 v
)
1389 if (offset
< rdev
->doorbell
.size
) {
1390 writel(v
, ((void __iomem
*)rdev
->doorbell
.ptr
) + offset
);
1392 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset
);
1396 #define BONAIRE_IO_MC_REGS_SIZE 36
1398 static const u32 bonaire_io_mc_regs
[BONAIRE_IO_MC_REGS_SIZE
][2] =
1400 {0x00000070, 0x04400000},
1401 {0x00000071, 0x80c01803},
1402 {0x00000072, 0x00004004},
1403 {0x00000073, 0x00000100},
1404 {0x00000074, 0x00ff0000},
1405 {0x00000075, 0x34000000},
1406 {0x00000076, 0x08000014},
1407 {0x00000077, 0x00cc08ec},
1408 {0x00000078, 0x00000400},
1409 {0x00000079, 0x00000000},
1410 {0x0000007a, 0x04090000},
1411 {0x0000007c, 0x00000000},
1412 {0x0000007e, 0x4408a8e8},
1413 {0x0000007f, 0x00000304},
1414 {0x00000080, 0x00000000},
1415 {0x00000082, 0x00000001},
1416 {0x00000083, 0x00000002},
1417 {0x00000084, 0xf3e4f400},
1418 {0x00000085, 0x052024e3},
1419 {0x00000087, 0x00000000},
1420 {0x00000088, 0x01000000},
1421 {0x0000008a, 0x1c0a0000},
1422 {0x0000008b, 0xff010000},
1423 {0x0000008d, 0xffffefff},
1424 {0x0000008e, 0xfff3efff},
1425 {0x0000008f, 0xfff3efbf},
1426 {0x00000092, 0xf7ffffff},
1427 {0x00000093, 0xffffff7f},
1428 {0x00000095, 0x00101101},
1429 {0x00000096, 0x00000fff},
1430 {0x00000097, 0x00116fff},
1431 {0x00000098, 0x60010000},
1432 {0x00000099, 0x10010000},
1433 {0x0000009a, 0x00006000},
1434 {0x0000009b, 0x00001000},
1435 {0x0000009f, 0x00b48000}
1439 * cik_srbm_select - select specific register instances
1441 * @rdev: radeon_device pointer
1442 * @me: selected ME (micro engine)
1447 * Switches the currently active registers instances. Some
1448 * registers are instanced per VMID, others are instanced per
1449 * me/pipe/queue combination.
1451 static void cik_srbm_select(struct radeon_device
*rdev
,
1452 u32 me
, u32 pipe
, u32 queue
, u32 vmid
)
1454 u32 srbm_gfx_cntl
= (PIPEID(pipe
& 0x3) |
1457 QUEUEID(queue
& 0x7));
1458 WREG32(SRBM_GFX_CNTL
, srbm_gfx_cntl
);
1463 * ci_mc_load_microcode - load MC ucode into the hw
1465 * @rdev: radeon_device pointer
1467 * Load the GDDR MC ucode into the hw (CIK).
1468 * Returns 0 on success, error on failure.
1470 static int ci_mc_load_microcode(struct radeon_device
*rdev
)
1472 const __be32
*fw_data
;
1473 u32 running
, blackout
= 0;
1475 int i
, ucode_size
, regs_size
;
1480 switch (rdev
->family
) {
1483 io_mc_regs
= (u32
*)&bonaire_io_mc_regs
;
1484 ucode_size
= CIK_MC_UCODE_SIZE
;
1485 regs_size
= BONAIRE_IO_MC_REGS_SIZE
;
1489 running
= RREG32(MC_SEQ_SUP_CNTL
) & RUN_MASK
;
1493 blackout
= RREG32(MC_SHARED_BLACKOUT_CNTL
);
1494 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
| 1);
1497 /* reset the engine and set to writable */
1498 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
1499 WREG32(MC_SEQ_SUP_CNTL
, 0x00000010);
1501 /* load mc io regs */
1502 for (i
= 0; i
< regs_size
; i
++) {
1503 WREG32(MC_SEQ_IO_DEBUG_INDEX
, io_mc_regs
[(i
<< 1)]);
1504 WREG32(MC_SEQ_IO_DEBUG_DATA
, io_mc_regs
[(i
<< 1) + 1]);
1506 /* load the MC ucode */
1507 fw_data
= (const __be32
*)rdev
->mc_fw
->data
;
1508 for (i
= 0; i
< ucode_size
; i
++)
1509 WREG32(MC_SEQ_SUP_PGM
, be32_to_cpup(fw_data
++));
1511 /* put the engine back into the active state */
1512 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
1513 WREG32(MC_SEQ_SUP_CNTL
, 0x00000004);
1514 WREG32(MC_SEQ_SUP_CNTL
, 0x00000001);
1516 /* wait for training to complete */
1517 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
1518 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D0
)
1522 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
1523 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D1
)
1529 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
);
1536 * cik_init_microcode - load ucode images from disk
1538 * @rdev: radeon_device pointer
1540 * Use the firmware interface to load the ucode images into
1541 * the driver (not loaded into hw).
1542 * Returns 0 on success, error on failure.
1544 static int cik_init_microcode(struct radeon_device
*rdev
)
1546 const char *chip_name
;
1547 size_t pfp_req_size
, me_req_size
, ce_req_size
,
1548 mec_req_size
, rlc_req_size
, mc_req_size
,
1549 sdma_req_size
, smc_req_size
;
1555 switch (rdev
->family
) {
1557 chip_name
= "BONAIRE";
1558 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
1559 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
1560 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
1561 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
1562 rlc_req_size
= BONAIRE_RLC_UCODE_SIZE
* 4;
1563 mc_req_size
= CIK_MC_UCODE_SIZE
* 4;
1564 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
1565 smc_req_size
= ALIGN(BONAIRE_SMC_UCODE_SIZE
, 4);
1568 chip_name
= "KAVERI";
1569 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
1570 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
1571 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
1572 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
1573 rlc_req_size
= KV_RLC_UCODE_SIZE
* 4;
1574 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
1577 chip_name
= "KABINI";
1578 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
1579 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
1580 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
1581 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
1582 rlc_req_size
= KB_RLC_UCODE_SIZE
* 4;
1583 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
1588 DRM_INFO("Loading %s Microcode\n", chip_name
);
1590 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_pfp.bin", chip_name
);
1591 err
= request_firmware(&rdev
->pfp_fw
, fw_name
, rdev
->dev
);
1594 if (rdev
->pfp_fw
->size
!= pfp_req_size
) {
1596 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1597 rdev
->pfp_fw
->size
, fw_name
);
1602 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_me.bin", chip_name
);
1603 err
= request_firmware(&rdev
->me_fw
, fw_name
, rdev
->dev
);
1606 if (rdev
->me_fw
->size
!= me_req_size
) {
1608 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1609 rdev
->me_fw
->size
, fw_name
);
1613 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_ce.bin", chip_name
);
1614 err
= request_firmware(&rdev
->ce_fw
, fw_name
, rdev
->dev
);
1617 if (rdev
->ce_fw
->size
!= ce_req_size
) {
1619 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1620 rdev
->ce_fw
->size
, fw_name
);
1624 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mec.bin", chip_name
);
1625 err
= request_firmware(&rdev
->mec_fw
, fw_name
, rdev
->dev
);
1628 if (rdev
->mec_fw
->size
!= mec_req_size
) {
1630 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1631 rdev
->mec_fw
->size
, fw_name
);
1635 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_rlc.bin", chip_name
);
1636 err
= request_firmware(&rdev
->rlc_fw
, fw_name
, rdev
->dev
);
1639 if (rdev
->rlc_fw
->size
!= rlc_req_size
) {
1641 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1642 rdev
->rlc_fw
->size
, fw_name
);
1646 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_sdma.bin", chip_name
);
1647 err
= request_firmware(&rdev
->sdma_fw
, fw_name
, rdev
->dev
);
1650 if (rdev
->sdma_fw
->size
!= sdma_req_size
) {
1652 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1653 rdev
->sdma_fw
->size
, fw_name
);
1657 /* No SMC, MC ucode on APUs */
1658 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
1659 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mc.bin", chip_name
);
1660 err
= request_firmware(&rdev
->mc_fw
, fw_name
, rdev
->dev
);
1663 if (rdev
->mc_fw
->size
!= mc_req_size
) {
1665 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1666 rdev
->mc_fw
->size
, fw_name
);
1670 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_smc.bin", chip_name
);
1671 err
= request_firmware(&rdev
->smc_fw
, fw_name
, rdev
->dev
);
1674 "smc: error loading firmware \"%s\"\n",
1676 release_firmware(rdev
->smc_fw
);
1677 rdev
->smc_fw
= NULL
;
1678 } else if (rdev
->smc_fw
->size
!= smc_req_size
) {
1680 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1681 rdev
->smc_fw
->size
, fw_name
);
1690 "cik_cp: Failed to load firmware \"%s\"\n",
1692 release_firmware(rdev
->pfp_fw
);
1693 rdev
->pfp_fw
= NULL
;
1694 release_firmware(rdev
->me_fw
);
1696 release_firmware(rdev
->ce_fw
);
1698 release_firmware(rdev
->rlc_fw
);
1699 rdev
->rlc_fw
= NULL
;
1700 release_firmware(rdev
->mc_fw
);
1702 release_firmware(rdev
->smc_fw
);
1703 rdev
->smc_fw
= NULL
;
1712 * cik_tiling_mode_table_init - init the hw tiling table
1714 * @rdev: radeon_device pointer
1716 * Starting with SI, the tiling setup is done globally in a
1717 * set of 32 tiling modes. Rather than selecting each set of
1718 * parameters per surface as on older asics, we just select
1719 * which index in the tiling table we want to use, and the
1720 * surface uses those parameters (CIK).
1722 static void cik_tiling_mode_table_init(struct radeon_device
*rdev
)
1724 const u32 num_tile_mode_states
= 32;
1725 const u32 num_secondary_tile_mode_states
= 16;
1726 u32 reg_offset
, gb_tile_moden
, split_equal_to_row_size
;
1727 u32 num_pipe_configs
;
1728 u32 num_rbs
= rdev
->config
.cik
.max_backends_per_se
*
1729 rdev
->config
.cik
.max_shader_engines
;
1731 switch (rdev
->config
.cik
.mem_row_size_in_kb
) {
1733 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_1KB
;
1737 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_2KB
;
1740 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_4KB
;
1744 num_pipe_configs
= rdev
->config
.cik
.max_tile_pipes
;
1745 if (num_pipe_configs
> 8)
1746 num_pipe_configs
= 8; /* ??? */
1748 if (num_pipe_configs
== 8) {
1749 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1750 switch (reg_offset
) {
1752 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1754 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1755 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
1758 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1760 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1761 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
1764 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1765 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1766 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1770 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1771 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1772 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1773 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
1776 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1777 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1778 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1779 TILE_SPLIT(split_equal_to_row_size
));
1782 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1786 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1788 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1789 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1792 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1793 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1794 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1795 TILE_SPLIT(split_equal_to_row_size
));
1798 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
1799 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
));
1802 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1803 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
1806 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1812 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1813 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1814 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1818 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1819 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1820 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1824 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1825 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
1828 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1829 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1830 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1831 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1834 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1835 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1836 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1840 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1841 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1842 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1846 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1847 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
1850 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1851 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1852 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1856 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1857 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1858 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1862 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1863 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1864 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1871 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1872 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1874 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1875 switch (reg_offset
) {
1877 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1878 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1879 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1880 NUM_BANKS(ADDR_SURF_16_BANK
));
1883 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1886 NUM_BANKS(ADDR_SURF_16_BANK
));
1889 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1890 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1891 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1892 NUM_BANKS(ADDR_SURF_16_BANK
));
1895 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1898 NUM_BANKS(ADDR_SURF_16_BANK
));
1901 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1902 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1903 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1904 NUM_BANKS(ADDR_SURF_8_BANK
));
1907 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1908 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1909 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1910 NUM_BANKS(ADDR_SURF_4_BANK
));
1913 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1914 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1915 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1916 NUM_BANKS(ADDR_SURF_2_BANK
));
1919 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1922 NUM_BANKS(ADDR_SURF_16_BANK
));
1925 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1928 NUM_BANKS(ADDR_SURF_16_BANK
));
1931 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1934 NUM_BANKS(ADDR_SURF_16_BANK
));
1937 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1940 NUM_BANKS(ADDR_SURF_16_BANK
));
1943 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1946 NUM_BANKS(ADDR_SURF_8_BANK
));
1949 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1952 NUM_BANKS(ADDR_SURF_4_BANK
));
1955 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1958 NUM_BANKS(ADDR_SURF_2_BANK
));
1964 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1966 } else if (num_pipe_configs
== 4) {
1968 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1969 switch (reg_offset
) {
1971 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1972 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1973 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1974 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
1977 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1979 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
1983 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1984 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1985 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1986 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1989 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1991 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
1995 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1996 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1997 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1998 TILE_SPLIT(split_equal_to_row_size
));
2001 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2005 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2007 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2011 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2013 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2014 TILE_SPLIT(split_equal_to_row_size
));
2017 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2018 PIPE_CONFIG(ADDR_SURF_P4_16x16
));
2021 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2022 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
2025 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2031 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2033 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2037 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2038 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2039 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2043 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2044 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
2047 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2048 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2049 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2053 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2055 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2059 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2060 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2061 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2065 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
2069 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2070 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2071 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2075 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2077 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2081 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2083 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2090 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
2091 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2093 } else if (num_rbs
< 4) {
2094 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
2095 switch (reg_offset
) {
2097 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2098 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2099 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
2103 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2104 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2105 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
2109 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2111 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2115 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2116 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2117 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
2121 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2123 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2124 TILE_SPLIT(split_equal_to_row_size
));
2127 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2128 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2131 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2132 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2133 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2134 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2137 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2139 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2140 TILE_SPLIT(split_equal_to_row_size
));
2143 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2144 PIPE_CONFIG(ADDR_SURF_P4_8x16
));
2147 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
2151 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2157 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2159 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2163 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2165 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2169 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
2173 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2175 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2179 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2181 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2185 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2186 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2187 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2191 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
2195 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2197 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2201 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2202 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2203 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2207 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2208 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2209 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2216 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
2217 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2220 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
2221 switch (reg_offset
) {
2223 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2226 NUM_BANKS(ADDR_SURF_16_BANK
));
2229 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2232 NUM_BANKS(ADDR_SURF_16_BANK
));
2235 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2238 NUM_BANKS(ADDR_SURF_16_BANK
));
2241 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2244 NUM_BANKS(ADDR_SURF_16_BANK
));
2247 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2250 NUM_BANKS(ADDR_SURF_16_BANK
));
2253 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2256 NUM_BANKS(ADDR_SURF_8_BANK
));
2259 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2262 NUM_BANKS(ADDR_SURF_4_BANK
));
2265 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
2267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2268 NUM_BANKS(ADDR_SURF_16_BANK
));
2271 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2274 NUM_BANKS(ADDR_SURF_16_BANK
));
2277 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2280 NUM_BANKS(ADDR_SURF_16_BANK
));
2283 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2286 NUM_BANKS(ADDR_SURF_16_BANK
));
2289 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2292 NUM_BANKS(ADDR_SURF_16_BANK
));
2295 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2298 NUM_BANKS(ADDR_SURF_8_BANK
));
2301 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2304 NUM_BANKS(ADDR_SURF_4_BANK
));
2310 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2312 } else if (num_pipe_configs
== 2) {
2313 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
2314 switch (reg_offset
) {
2316 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2318 PIPE_CONFIG(ADDR_SURF_P2
) |
2319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
2322 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2324 PIPE_CONFIG(ADDR_SURF_P2
) |
2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
2328 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2330 PIPE_CONFIG(ADDR_SURF_P2
) |
2331 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2334 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2336 PIPE_CONFIG(ADDR_SURF_P2
) |
2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
2340 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2342 PIPE_CONFIG(ADDR_SURF_P2
) |
2343 TILE_SPLIT(split_equal_to_row_size
));
2346 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2350 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2352 PIPE_CONFIG(ADDR_SURF_P2
) |
2353 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2356 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2358 PIPE_CONFIG(ADDR_SURF_P2
) |
2359 TILE_SPLIT(split_equal_to_row_size
));
2362 gb_tile_moden
= ARRAY_MODE(ARRAY_LINEAR_ALIGNED
);
2365 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
2369 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2371 PIPE_CONFIG(ADDR_SURF_P2
) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2375 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2377 PIPE_CONFIG(ADDR_SURF_P2
) |
2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2381 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2383 PIPE_CONFIG(ADDR_SURF_P2
) |
2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2387 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
2391 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2393 PIPE_CONFIG(ADDR_SURF_P2
) |
2394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2397 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2399 PIPE_CONFIG(ADDR_SURF_P2
) |
2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2403 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2405 PIPE_CONFIG(ADDR_SURF_P2
) |
2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2409 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
2413 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2415 PIPE_CONFIG(ADDR_SURF_P2
) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2419 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2421 PIPE_CONFIG(ADDR_SURF_P2
) |
2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2425 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2427 PIPE_CONFIG(ADDR_SURF_P2
) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2434 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
2435 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2437 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
2438 switch (reg_offset
) {
2440 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2443 NUM_BANKS(ADDR_SURF_16_BANK
));
2446 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2449 NUM_BANKS(ADDR_SURF_16_BANK
));
2452 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2455 NUM_BANKS(ADDR_SURF_16_BANK
));
2458 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2461 NUM_BANKS(ADDR_SURF_16_BANK
));
2464 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2467 NUM_BANKS(ADDR_SURF_16_BANK
));
2470 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2473 NUM_BANKS(ADDR_SURF_16_BANK
));
2476 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2479 NUM_BANKS(ADDR_SURF_8_BANK
));
2482 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2485 NUM_BANKS(ADDR_SURF_16_BANK
));
2488 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2491 NUM_BANKS(ADDR_SURF_16_BANK
));
2494 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2497 NUM_BANKS(ADDR_SURF_16_BANK
));
2500 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2503 NUM_BANKS(ADDR_SURF_16_BANK
));
2506 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2509 NUM_BANKS(ADDR_SURF_16_BANK
));
2512 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2515 NUM_BANKS(ADDR_SURF_16_BANK
));
2518 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2521 NUM_BANKS(ADDR_SURF_8_BANK
));
2527 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2530 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs
);
2534 * cik_select_se_sh - select which SE, SH to address
2536 * @rdev: radeon_device pointer
2537 * @se_num: shader engine to address
2538 * @sh_num: sh block to address
2540 * Select which SE, SH combinations to address. Certain
2541 * registers are instanced per SE or SH. 0xffffffff means
2542 * broadcast to all SEs or SHs (CIK).
2544 static void cik_select_se_sh(struct radeon_device
*rdev
,
2545 u32 se_num
, u32 sh_num
)
2547 u32 data
= INSTANCE_BROADCAST_WRITES
;
2549 if ((se_num
== 0xffffffff) && (sh_num
== 0xffffffff))
2550 data
|= SH_BROADCAST_WRITES
| SE_BROADCAST_WRITES
;
2551 else if (se_num
== 0xffffffff)
2552 data
|= SE_BROADCAST_WRITES
| SH_INDEX(sh_num
);
2553 else if (sh_num
== 0xffffffff)
2554 data
|= SH_BROADCAST_WRITES
| SE_INDEX(se_num
);
2556 data
|= SH_INDEX(sh_num
) | SE_INDEX(se_num
);
2557 WREG32(GRBM_GFX_INDEX
, data
);
2561 * cik_create_bitmask - create a bitmask
2563 * @bit_width: length of the mask
2565 * create a variable length bit mask (CIK).
2566 * Returns the bitmask.
2568 static u32
cik_create_bitmask(u32 bit_width
)
2572 for (i
= 0; i
< bit_width
; i
++) {
2580 * cik_select_se_sh - select which SE, SH to address
2582 * @rdev: radeon_device pointer
2583 * @max_rb_num: max RBs (render backends) for the asic
2584 * @se_num: number of SEs (shader engines) for the asic
2585 * @sh_per_se: number of SH blocks per SE for the asic
2587 * Calculates the bitmask of disabled RBs (CIK).
2588 * Returns the disabled RB bitmask.
2590 static u32
cik_get_rb_disabled(struct radeon_device
*rdev
,
2591 u32 max_rb_num
, u32 se_num
,
2596 data
= RREG32(CC_RB_BACKEND_DISABLE
);
2598 data
&= BACKEND_DISABLE_MASK
;
2601 data
|= RREG32(GC_USER_RB_BACKEND_DISABLE
);
2603 data
>>= BACKEND_DISABLE_SHIFT
;
2605 mask
= cik_create_bitmask(max_rb_num
/ se_num
/ sh_per_se
);
2611 * cik_setup_rb - setup the RBs on the asic
2613 * @rdev: radeon_device pointer
2614 * @se_num: number of SEs (shader engines) for the asic
2615 * @sh_per_se: number of SH blocks per SE for the asic
2616 * @max_rb_num: max RBs (render backends) for the asic
2618 * Configures per-SE/SH RB registers (CIK).
2620 static void cik_setup_rb(struct radeon_device
*rdev
,
2621 u32 se_num
, u32 sh_per_se
,
2626 u32 disabled_rbs
= 0;
2627 u32 enabled_rbs
= 0;
2629 for (i
= 0; i
< se_num
; i
++) {
2630 for (j
= 0; j
< sh_per_se
; j
++) {
2631 cik_select_se_sh(rdev
, i
, j
);
2632 data
= cik_get_rb_disabled(rdev
, max_rb_num
, se_num
, sh_per_se
);
2633 disabled_rbs
|= data
<< ((i
* sh_per_se
+ j
) * CIK_RB_BITMAP_WIDTH_PER_SH
);
2636 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
2639 for (i
= 0; i
< max_rb_num
; i
++) {
2640 if (!(disabled_rbs
& mask
))
2641 enabled_rbs
|= mask
;
2645 for (i
= 0; i
< se_num
; i
++) {
2646 cik_select_se_sh(rdev
, i
, 0xffffffff);
2648 for (j
= 0; j
< sh_per_se
; j
++) {
2649 switch (enabled_rbs
& 3) {
2651 data
|= (RASTER_CONFIG_RB_MAP_0
<< (i
* sh_per_se
+ j
) * 2);
2654 data
|= (RASTER_CONFIG_RB_MAP_3
<< (i
* sh_per_se
+ j
) * 2);
2658 data
|= (RASTER_CONFIG_RB_MAP_2
<< (i
* sh_per_se
+ j
) * 2);
2663 WREG32(PA_SC_RASTER_CONFIG
, data
);
2665 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
2669 * cik_gpu_init - setup the 3D engine
2671 * @rdev: radeon_device pointer
2673 * Configures the 3D engine and tiling configuration
2674 * registers so that the 3D engine is usable.
2676 static void cik_gpu_init(struct radeon_device
*rdev
)
2678 u32 gb_addr_config
= RREG32(GB_ADDR_CONFIG
);
2679 u32 mc_shared_chmap
, mc_arb_ramcfg
;
2680 u32 hdp_host_path_cntl
;
2684 switch (rdev
->family
) {
2686 rdev
->config
.cik
.max_shader_engines
= 2;
2687 rdev
->config
.cik
.max_tile_pipes
= 4;
2688 rdev
->config
.cik
.max_cu_per_sh
= 7;
2689 rdev
->config
.cik
.max_sh_per_se
= 1;
2690 rdev
->config
.cik
.max_backends_per_se
= 2;
2691 rdev
->config
.cik
.max_texture_channel_caches
= 4;
2692 rdev
->config
.cik
.max_gprs
= 256;
2693 rdev
->config
.cik
.max_gs_threads
= 32;
2694 rdev
->config
.cik
.max_hw_contexts
= 8;
2696 rdev
->config
.cik
.sc_prim_fifo_size_frontend
= 0x20;
2697 rdev
->config
.cik
.sc_prim_fifo_size_backend
= 0x100;
2698 rdev
->config
.cik
.sc_hiz_tile_fifo_size
= 0x30;
2699 rdev
->config
.cik
.sc_earlyz_tile_fifo_size
= 0x130;
2700 gb_addr_config
= BONAIRE_GB_ADDR_CONFIG_GOLDEN
;
2707 rdev
->config
.cik
.max_shader_engines
= 1;
2708 rdev
->config
.cik
.max_tile_pipes
= 2;
2709 rdev
->config
.cik
.max_cu_per_sh
= 2;
2710 rdev
->config
.cik
.max_sh_per_se
= 1;
2711 rdev
->config
.cik
.max_backends_per_se
= 1;
2712 rdev
->config
.cik
.max_texture_channel_caches
= 2;
2713 rdev
->config
.cik
.max_gprs
= 256;
2714 rdev
->config
.cik
.max_gs_threads
= 16;
2715 rdev
->config
.cik
.max_hw_contexts
= 8;
2717 rdev
->config
.cik
.sc_prim_fifo_size_frontend
= 0x20;
2718 rdev
->config
.cik
.sc_prim_fifo_size_backend
= 0x100;
2719 rdev
->config
.cik
.sc_hiz_tile_fifo_size
= 0x30;
2720 rdev
->config
.cik
.sc_earlyz_tile_fifo_size
= 0x130;
2721 gb_addr_config
= BONAIRE_GB_ADDR_CONFIG_GOLDEN
;
2725 /* Initialize HDP */
2726 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
2727 WREG32((0x2c14 + j
), 0x00000000);
2728 WREG32((0x2c18 + j
), 0x00000000);
2729 WREG32((0x2c1c + j
), 0x00000000);
2730 WREG32((0x2c20 + j
), 0x00000000);
2731 WREG32((0x2c24 + j
), 0x00000000);
2734 WREG32(GRBM_CNTL
, GRBM_READ_TIMEOUT(0xff));
2736 WREG32(BIF_FB_EN
, FB_READ_EN
| FB_WRITE_EN
);
2738 mc_shared_chmap
= RREG32(MC_SHARED_CHMAP
);
2739 mc_arb_ramcfg
= RREG32(MC_ARB_RAMCFG
);
2741 rdev
->config
.cik
.num_tile_pipes
= rdev
->config
.cik
.max_tile_pipes
;
2742 rdev
->config
.cik
.mem_max_burst_length_bytes
= 256;
2743 tmp
= (mc_arb_ramcfg
& NOOFCOLS_MASK
) >> NOOFCOLS_SHIFT
;
2744 rdev
->config
.cik
.mem_row_size_in_kb
= (4 * (1 << (8 + tmp
))) / 1024;
2745 if (rdev
->config
.cik
.mem_row_size_in_kb
> 4)
2746 rdev
->config
.cik
.mem_row_size_in_kb
= 4;
2747 /* XXX use MC settings? */
2748 rdev
->config
.cik
.shader_engine_tile_size
= 32;
2749 rdev
->config
.cik
.num_gpus
= 1;
2750 rdev
->config
.cik
.multi_gpu_tile_size
= 64;
2752 /* fix up row size */
2753 gb_addr_config
&= ~ROW_SIZE_MASK
;
2754 switch (rdev
->config
.cik
.mem_row_size_in_kb
) {
2757 gb_addr_config
|= ROW_SIZE(0);
2760 gb_addr_config
|= ROW_SIZE(1);
2763 gb_addr_config
|= ROW_SIZE(2);
2767 /* setup tiling info dword. gb_addr_config is not adequate since it does
2768 * not have bank info, so create a custom tiling dword.
2769 * bits 3:0 num_pipes
2770 * bits 7:4 num_banks
2771 * bits 11:8 group_size
2772 * bits 15:12 row_size
2774 rdev
->config
.cik
.tile_config
= 0;
2775 switch (rdev
->config
.cik
.num_tile_pipes
) {
2777 rdev
->config
.cik
.tile_config
|= (0 << 0);
2780 rdev
->config
.cik
.tile_config
|= (1 << 0);
2783 rdev
->config
.cik
.tile_config
|= (2 << 0);
2787 /* XXX what about 12? */
2788 rdev
->config
.cik
.tile_config
|= (3 << 0);
2791 if ((mc_arb_ramcfg
& NOOFBANK_MASK
) >> NOOFBANK_SHIFT
)
2792 rdev
->config
.cik
.tile_config
|= 1 << 4;
2794 rdev
->config
.cik
.tile_config
|= 0 << 4;
2795 rdev
->config
.cik
.tile_config
|=
2796 ((gb_addr_config
& PIPE_INTERLEAVE_SIZE_MASK
) >> PIPE_INTERLEAVE_SIZE_SHIFT
) << 8;
2797 rdev
->config
.cik
.tile_config
|=
2798 ((gb_addr_config
& ROW_SIZE_MASK
) >> ROW_SIZE_SHIFT
) << 12;
2800 WREG32(GB_ADDR_CONFIG
, gb_addr_config
);
2801 WREG32(HDP_ADDR_CONFIG
, gb_addr_config
);
2802 WREG32(DMIF_ADDR_CALC
, gb_addr_config
);
2803 WREG32(SDMA0_TILING_CONFIG
+ SDMA0_REGISTER_OFFSET
, gb_addr_config
& 0x70);
2804 WREG32(SDMA0_TILING_CONFIG
+ SDMA1_REGISTER_OFFSET
, gb_addr_config
& 0x70);
2805 WREG32(UVD_UDEC_ADDR_CONFIG
, gb_addr_config
);
2806 WREG32(UVD_UDEC_DB_ADDR_CONFIG
, gb_addr_config
);
2807 WREG32(UVD_UDEC_DBW_ADDR_CONFIG
, gb_addr_config
);
2809 cik_tiling_mode_table_init(rdev
);
2811 cik_setup_rb(rdev
, rdev
->config
.cik
.max_shader_engines
,
2812 rdev
->config
.cik
.max_sh_per_se
,
2813 rdev
->config
.cik
.max_backends_per_se
);
2815 /* set HW defaults for 3D engine */
2816 WREG32(CP_MEQ_THRESHOLDS
, MEQ1_START(0x30) | MEQ2_START(0x60));
2818 WREG32(SX_DEBUG_1
, 0x20);
2820 WREG32(TA_CNTL_AUX
, 0x00010000);
2822 tmp
= RREG32(SPI_CONFIG_CNTL
);
2824 WREG32(SPI_CONFIG_CNTL
, tmp
);
2826 WREG32(SQ_CONFIG
, 1);
2828 WREG32(DB_DEBUG
, 0);
2830 tmp
= RREG32(DB_DEBUG2
) & ~0xf00fffff;
2832 WREG32(DB_DEBUG2
, tmp
);
2834 tmp
= RREG32(DB_DEBUG3
) & ~0x0002021c;
2836 WREG32(DB_DEBUG3
, tmp
);
2838 tmp
= RREG32(CB_HW_CONTROL
) & ~0x00010000;
2840 WREG32(CB_HW_CONTROL
, tmp
);
2842 WREG32(SPI_CONFIG_CNTL_1
, VTX_DONE_DELAY(4));
2844 WREG32(PA_SC_FIFO_SIZE
, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev
->config
.cik
.sc_prim_fifo_size_frontend
) |
2845 SC_BACKEND_PRIM_FIFO_SIZE(rdev
->config
.cik
.sc_prim_fifo_size_backend
) |
2846 SC_HIZ_TILE_FIFO_SIZE(rdev
->config
.cik
.sc_hiz_tile_fifo_size
) |
2847 SC_EARLYZ_TILE_FIFO_SIZE(rdev
->config
.cik
.sc_earlyz_tile_fifo_size
)));
2849 WREG32(VGT_NUM_INSTANCES
, 1);
2851 WREG32(CP_PERFMON_CNTL
, 0);
2853 WREG32(SQ_CONFIG
, 0);
2855 WREG32(PA_SC_FORCE_EOV_MAX_CNTS
, (FORCE_EOV_MAX_CLK_CNT(4095) |
2856 FORCE_EOV_MAX_REZ_CNT(255)));
2858 WREG32(VGT_CACHE_INVALIDATION
, CACHE_INVALIDATION(VC_AND_TC
) |
2859 AUTO_INVLD_EN(ES_AND_GS_AUTO
));
2861 WREG32(VGT_GS_VERTEX_REUSE
, 16);
2862 WREG32(PA_SC_LINE_STIPPLE_STATE
, 0);
2864 tmp
= RREG32(HDP_MISC_CNTL
);
2865 tmp
|= HDP_FLUSH_INVALIDATE_CACHE
;
2866 WREG32(HDP_MISC_CNTL
, tmp
);
2868 hdp_host_path_cntl
= RREG32(HDP_HOST_PATH_CNTL
);
2869 WREG32(HDP_HOST_PATH_CNTL
, hdp_host_path_cntl
);
2871 WREG32(PA_CL_ENHANCE
, CLIP_VTX_REORDER_ENA
| NUM_CLIP_SEQ(3));
2872 WREG32(PA_SC_ENHANCE
, ENABLE_PA_SC_OUT_OF_ORDER
);
2878 * GPU scratch registers helpers function.
2881 * cik_scratch_init - setup driver info for CP scratch regs
2883 * @rdev: radeon_device pointer
2885 * Set up the number and offset of the CP scratch registers.
2886 * NOTE: use of CP scratch registers is a legacy inferface and
2887 * is not used by default on newer asics (r6xx+). On newer asics,
2888 * memory buffers are used for fences rather than scratch regs.
2890 static void cik_scratch_init(struct radeon_device
*rdev
)
2894 rdev
->scratch
.num_reg
= 7;
2895 rdev
->scratch
.reg_base
= SCRATCH_REG0
;
2896 for (i
= 0; i
< rdev
->scratch
.num_reg
; i
++) {
2897 rdev
->scratch
.free
[i
] = true;
2898 rdev
->scratch
.reg
[i
] = rdev
->scratch
.reg_base
+ (i
* 4);
2903 * cik_ring_test - basic gfx ring test
2905 * @rdev: radeon_device pointer
2906 * @ring: radeon_ring structure holding ring information
2908 * Allocate a scratch register and write to it using the gfx ring (CIK).
2909 * Provides a basic gfx ring test to verify that the ring is working.
2910 * Used by cik_cp_gfx_resume();
2911 * Returns 0 on success, error on failure.
2913 int cik_ring_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
2920 r
= radeon_scratch_get(rdev
, &scratch
);
2922 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r
);
2925 WREG32(scratch
, 0xCAFEDEAD);
2926 r
= radeon_ring_lock(rdev
, ring
, 3);
2928 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring
->idx
, r
);
2929 radeon_scratch_free(rdev
, scratch
);
2932 radeon_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
2933 radeon_ring_write(ring
, ((scratch
- PACKET3_SET_UCONFIG_REG_START
) >> 2));
2934 radeon_ring_write(ring
, 0xDEADBEEF);
2935 radeon_ring_unlock_commit(rdev
, ring
);
2937 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
2938 tmp
= RREG32(scratch
);
2939 if (tmp
== 0xDEADBEEF)
2943 if (i
< rdev
->usec_timeout
) {
2944 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
2946 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2947 ring
->idx
, scratch
, tmp
);
2950 radeon_scratch_free(rdev
, scratch
);
2955 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2957 * @rdev: radeon_device pointer
2958 * @fence: radeon fence object
2960 * Emits a fence sequnce number on the gfx ring and flushes
2963 void cik_fence_gfx_ring_emit(struct radeon_device
*rdev
,
2964 struct radeon_fence
*fence
)
2966 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
2967 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
2969 /* EVENT_WRITE_EOP - flush caches, send int */
2970 radeon_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE_EOP
, 4));
2971 radeon_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
2973 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
2975 radeon_ring_write(ring
, addr
& 0xfffffffc);
2976 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2977 radeon_ring_write(ring
, fence
->seq
);
2978 radeon_ring_write(ring
, 0);
2980 /* We should be using the new WAIT_REG_MEM special op packet here
2981 * but it causes the CP to hang
2983 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
2984 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
2985 WRITE_DATA_DST_SEL(0)));
2986 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
2987 radeon_ring_write(ring
, 0);
2988 radeon_ring_write(ring
, 0);
2992 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2994 * @rdev: radeon_device pointer
2995 * @fence: radeon fence object
2997 * Emits a fence sequnce number on the compute ring and flushes
3000 void cik_fence_compute_ring_emit(struct radeon_device
*rdev
,
3001 struct radeon_fence
*fence
)
3003 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
3004 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
3006 /* RELEASE_MEM - flush caches, send int */
3007 radeon_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 5));
3008 radeon_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
3010 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
3012 radeon_ring_write(ring
, DATA_SEL(1) | INT_SEL(2));
3013 radeon_ring_write(ring
, addr
& 0xfffffffc);
3014 radeon_ring_write(ring
, upper_32_bits(addr
));
3015 radeon_ring_write(ring
, fence
->seq
);
3016 radeon_ring_write(ring
, 0);
3018 /* We should be using the new WAIT_REG_MEM special op packet here
3019 * but it causes the CP to hang
3021 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3022 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3023 WRITE_DATA_DST_SEL(0)));
3024 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
3025 radeon_ring_write(ring
, 0);
3026 radeon_ring_write(ring
, 0);
3029 void cik_semaphore_ring_emit(struct radeon_device
*rdev
,
3030 struct radeon_ring
*ring
,
3031 struct radeon_semaphore
*semaphore
,
3034 uint64_t addr
= semaphore
->gpu_addr
;
3035 unsigned sel
= emit_wait
? PACKET3_SEM_SEL_WAIT
: PACKET3_SEM_SEL_SIGNAL
;
3037 radeon_ring_write(ring
, PACKET3(PACKET3_MEM_SEMAPHORE
, 1));
3038 radeon_ring_write(ring
, addr
& 0xffffffff);
3039 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | sel
);
3046 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3048 * @rdev: radeon_device pointer
3049 * @ib: radeon indirect buffer object
3051 * Emits an DE (drawing engine) or CE (constant engine) IB
3052 * on the gfx ring. IBs are usually generated by userspace
3053 * acceleration drivers and submitted to the kernel for
3054 * sheduling on the ring. This function schedules the IB
3055 * on the gfx ring for execution by the GPU.
3057 void cik_ring_ib_execute(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
3059 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
3060 u32 header
, control
= INDIRECT_BUFFER_VALID
;
3062 if (ib
->is_const_ib
) {
3063 /* set switch buffer packet before const IB */
3064 radeon_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
3065 radeon_ring_write(ring
, 0);
3067 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
3070 if (ring
->rptr_save_reg
) {
3071 next_rptr
= ring
->wptr
+ 3 + 4;
3072 radeon_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
3073 radeon_ring_write(ring
, ((ring
->rptr_save_reg
-
3074 PACKET3_SET_UCONFIG_REG_START
) >> 2));
3075 radeon_ring_write(ring
, next_rptr
);
3076 } else if (rdev
->wb
.enabled
) {
3077 next_rptr
= ring
->wptr
+ 5 + 4;
3078 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3079 radeon_ring_write(ring
, WRITE_DATA_DST_SEL(1));
3080 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
3081 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
3082 radeon_ring_write(ring
, next_rptr
);
3085 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
3088 control
|= ib
->length_dw
|
3089 (ib
->vm
? (ib
->vm
->id
<< 24) : 0);
3091 radeon_ring_write(ring
, header
);
3092 radeon_ring_write(ring
,
3096 (ib
->gpu_addr
& 0xFFFFFFFC));
3097 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFFFF);
3098 radeon_ring_write(ring
, control
);
3102 * cik_ib_test - basic gfx ring IB test
3104 * @rdev: radeon_device pointer
3105 * @ring: radeon_ring structure holding ring information
3107 * Allocate an IB and execute it on the gfx ring (CIK).
3108 * Provides a basic gfx ring test to verify that IBs are working.
3109 * Returns 0 on success, error on failure.
3111 int cik_ib_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
3113 struct radeon_ib ib
;
3119 r
= radeon_scratch_get(rdev
, &scratch
);
3121 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r
);
3124 WREG32(scratch
, 0xCAFEDEAD);
3125 r
= radeon_ib_get(rdev
, ring
->idx
, &ib
, NULL
, 256);
3127 DRM_ERROR("radeon: failed to get ib (%d).\n", r
);
3130 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
3131 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
) >> 2);
3132 ib
.ptr
[2] = 0xDEADBEEF;
3134 r
= radeon_ib_schedule(rdev
, &ib
, NULL
);
3136 radeon_scratch_free(rdev
, scratch
);
3137 radeon_ib_free(rdev
, &ib
);
3138 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r
);
3141 r
= radeon_fence_wait(ib
.fence
, false);
3143 DRM_ERROR("radeon: fence wait failed (%d).\n", r
);
3146 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3147 tmp
= RREG32(scratch
);
3148 if (tmp
== 0xDEADBEEF)
3152 if (i
< rdev
->usec_timeout
) {
3153 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib
.fence
->ring
, i
);
3155 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3159 radeon_scratch_free(rdev
, scratch
);
3160 radeon_ib_free(rdev
, &ib
);
3166 * On CIK, gfx and compute now have independant command processors.
3169 * Gfx consists of a single ring and can process both gfx jobs and
3170 * compute jobs. The gfx CP consists of three microengines (ME):
3171 * PFP - Pre-Fetch Parser
3173 * CE - Constant Engine
3174 * The PFP and ME make up what is considered the Drawing Engine (DE).
3175 * The CE is an asynchronous engine used for updating buffer desciptors
3176 * used by the DE so that they can be loaded into cache in parallel
3177 * while the DE is processing state update packets.
3180 * The compute CP consists of two microengines (ME):
3181 * MEC1 - Compute MicroEngine 1
3182 * MEC2 - Compute MicroEngine 2
3183 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3184 * The queues are exposed to userspace and are programmed directly
3185 * by the compute runtime.
3188 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3190 * @rdev: radeon_device pointer
3191 * @enable: enable or disable the MEs
3193 * Halts or unhalts the gfx MEs.
3195 static void cik_cp_gfx_enable(struct radeon_device
*rdev
, bool enable
)
3198 WREG32(CP_ME_CNTL
, 0);
3200 WREG32(CP_ME_CNTL
, (CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
));
3201 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
3207 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3209 * @rdev: radeon_device pointer
3211 * Loads the gfx PFP, ME, and CE ucode.
3212 * Returns 0 for success, -EINVAL if the ucode is not available.
3214 static int cik_cp_gfx_load_microcode(struct radeon_device
*rdev
)
3216 const __be32
*fw_data
;
3219 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
)
3222 cik_cp_gfx_enable(rdev
, false);
3225 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
3226 WREG32(CP_PFP_UCODE_ADDR
, 0);
3227 for (i
= 0; i
< CIK_PFP_UCODE_SIZE
; i
++)
3228 WREG32(CP_PFP_UCODE_DATA
, be32_to_cpup(fw_data
++));
3229 WREG32(CP_PFP_UCODE_ADDR
, 0);
3232 fw_data
= (const __be32
*)rdev
->ce_fw
->data
;
3233 WREG32(CP_CE_UCODE_ADDR
, 0);
3234 for (i
= 0; i
< CIK_CE_UCODE_SIZE
; i
++)
3235 WREG32(CP_CE_UCODE_DATA
, be32_to_cpup(fw_data
++));
3236 WREG32(CP_CE_UCODE_ADDR
, 0);
3239 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
3240 WREG32(CP_ME_RAM_WADDR
, 0);
3241 for (i
= 0; i
< CIK_ME_UCODE_SIZE
; i
++)
3242 WREG32(CP_ME_RAM_DATA
, be32_to_cpup(fw_data
++));
3243 WREG32(CP_ME_RAM_WADDR
, 0);
3245 WREG32(CP_PFP_UCODE_ADDR
, 0);
3246 WREG32(CP_CE_UCODE_ADDR
, 0);
3247 WREG32(CP_ME_RAM_WADDR
, 0);
3248 WREG32(CP_ME_RAM_RADDR
, 0);
3253 * cik_cp_gfx_start - start the gfx ring
3255 * @rdev: radeon_device pointer
3257 * Enables the ring and loads the clear state context and other
3258 * packets required to init the ring.
3259 * Returns 0 for success, error for failure.
3261 static int cik_cp_gfx_start(struct radeon_device
*rdev
)
3263 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
3267 WREG32(CP_MAX_CONTEXT
, rdev
->config
.cik
.max_hw_contexts
- 1);
3268 WREG32(CP_ENDIAN_SWAP
, 0);
3269 WREG32(CP_DEVICE_ID
, 1);
3271 cik_cp_gfx_enable(rdev
, true);
3273 r
= radeon_ring_lock(rdev
, ring
, cik_default_size
+ 17);
3275 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r
);
3279 /* init the CE partitions. CE only used for gfx on CIK */
3280 radeon_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
3281 radeon_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
3282 radeon_ring_write(ring
, 0xc000);
3283 radeon_ring_write(ring
, 0xc000);
3285 /* setup clear context state */
3286 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
3287 radeon_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
3289 radeon_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
3290 radeon_ring_write(ring
, 0x80000000);
3291 radeon_ring_write(ring
, 0x80000000);
3293 for (i
= 0; i
< cik_default_size
; i
++)
3294 radeon_ring_write(ring
, cik_default_state
[i
]);
3296 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
3297 radeon_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
3299 /* set clear context state */
3300 radeon_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
3301 radeon_ring_write(ring
, 0);
3303 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
3304 radeon_ring_write(ring
, 0x00000316);
3305 radeon_ring_write(ring
, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3306 radeon_ring_write(ring
, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3308 radeon_ring_unlock_commit(rdev
, ring
);
3314 * cik_cp_gfx_fini - stop the gfx ring
3316 * @rdev: radeon_device pointer
3318 * Stop the gfx ring and tear down the driver ring
3321 static void cik_cp_gfx_fini(struct radeon_device
*rdev
)
3323 cik_cp_gfx_enable(rdev
, false);
3324 radeon_ring_fini(rdev
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
3328 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3330 * @rdev: radeon_device pointer
3332 * Program the location and size of the gfx ring buffer
3333 * and test it to make sure it's working.
3334 * Returns 0 for success, error for failure.
3336 static int cik_cp_gfx_resume(struct radeon_device
*rdev
)
3338 struct radeon_ring
*ring
;
3344 WREG32(CP_SEM_WAIT_TIMER
, 0x0);
3345 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL
, 0x0);
3347 /* Set the write pointer delay */
3348 WREG32(CP_RB_WPTR_DELAY
, 0);
3350 /* set the RB to use vmid 0 */
3351 WREG32(CP_RB_VMID
, 0);
3353 WREG32(SCRATCH_ADDR
, ((rdev
->wb
.gpu_addr
+ RADEON_WB_SCRATCH_OFFSET
) >> 8) & 0xFFFFFFFF);
3355 /* ring 0 - compute and gfx */
3356 /* Set ring buffer size */
3357 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
3358 rb_bufsz
= drm_order(ring
->ring_size
/ 8);
3359 tmp
= (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8) | rb_bufsz
;
3361 tmp
|= BUF_SWAP_32BIT
;
3363 WREG32(CP_RB0_CNTL
, tmp
);
3365 /* Initialize the ring buffer's read and write pointers */
3366 WREG32(CP_RB0_CNTL
, tmp
| RB_RPTR_WR_ENA
);
3368 WREG32(CP_RB0_WPTR
, ring
->wptr
);
3370 /* set the wb address wether it's enabled or not */
3371 WREG32(CP_RB0_RPTR_ADDR
, (rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFFFFFFFC);
3372 WREG32(CP_RB0_RPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFF);
3374 /* scratch register shadowing is no longer supported */
3375 WREG32(SCRATCH_UMSK
, 0);
3377 if (!rdev
->wb
.enabled
)
3378 tmp
|= RB_NO_UPDATE
;
3381 WREG32(CP_RB0_CNTL
, tmp
);
3383 rb_addr
= ring
->gpu_addr
>> 8;
3384 WREG32(CP_RB0_BASE
, rb_addr
);
3385 WREG32(CP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
3387 ring
->rptr
= RREG32(CP_RB0_RPTR
);
3389 /* start the ring */
3390 cik_cp_gfx_start(rdev
);
3391 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= true;
3392 r
= radeon_ring_test(rdev
, RADEON_RING_TYPE_GFX_INDEX
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
3394 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
3400 u32
cik_compute_ring_get_rptr(struct radeon_device
*rdev
,
3401 struct radeon_ring
*ring
)
3407 if (rdev
->wb
.enabled
) {
3408 rptr
= le32_to_cpu(rdev
->wb
.wb
[ring
->rptr_offs
/4]);
3410 mutex_lock(&rdev
->srbm_mutex
);
3411 cik_srbm_select(rdev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3412 rptr
= RREG32(CP_HQD_PQ_RPTR
);
3413 cik_srbm_select(rdev
, 0, 0, 0, 0);
3414 mutex_unlock(&rdev
->srbm_mutex
);
3420 u32
cik_compute_ring_get_wptr(struct radeon_device
*rdev
,
3421 struct radeon_ring
*ring
)
3425 if (rdev
->wb
.enabled
) {
3426 wptr
= le32_to_cpu(rdev
->wb
.wb
[ring
->wptr_offs
/4]);
3428 mutex_lock(&rdev
->srbm_mutex
);
3429 cik_srbm_select(rdev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3430 wptr
= RREG32(CP_HQD_PQ_WPTR
);
3431 cik_srbm_select(rdev
, 0, 0, 0, 0);
3432 mutex_unlock(&rdev
->srbm_mutex
);
3438 void cik_compute_ring_set_wptr(struct radeon_device
*rdev
,
3439 struct radeon_ring
*ring
)
3441 rdev
->wb
.wb
[ring
->wptr_offs
/4] = cpu_to_le32(ring
->wptr
);
3442 WDOORBELL32(ring
->doorbell_offset
, ring
->wptr
);
3446 * cik_cp_compute_enable - enable/disable the compute CP MEs
3448 * @rdev: radeon_device pointer
3449 * @enable: enable or disable the MEs
3451 * Halts or unhalts the compute MEs.
3453 static void cik_cp_compute_enable(struct radeon_device
*rdev
, bool enable
)
3456 WREG32(CP_MEC_CNTL
, 0);
3458 WREG32(CP_MEC_CNTL
, (MEC_ME1_HALT
| MEC_ME2_HALT
));
3463 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3465 * @rdev: radeon_device pointer
3467 * Loads the compute MEC1&2 ucode.
3468 * Returns 0 for success, -EINVAL if the ucode is not available.
3470 static int cik_cp_compute_load_microcode(struct radeon_device
*rdev
)
3472 const __be32
*fw_data
;
3478 cik_cp_compute_enable(rdev
, false);
3481 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
3482 WREG32(CP_MEC_ME1_UCODE_ADDR
, 0);
3483 for (i
= 0; i
< CIK_MEC_UCODE_SIZE
; i
++)
3484 WREG32(CP_MEC_ME1_UCODE_DATA
, be32_to_cpup(fw_data
++));
3485 WREG32(CP_MEC_ME1_UCODE_ADDR
, 0);
3487 if (rdev
->family
== CHIP_KAVERI
) {
3489 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
3490 WREG32(CP_MEC_ME2_UCODE_ADDR
, 0);
3491 for (i
= 0; i
< CIK_MEC_UCODE_SIZE
; i
++)
3492 WREG32(CP_MEC_ME2_UCODE_DATA
, be32_to_cpup(fw_data
++));
3493 WREG32(CP_MEC_ME2_UCODE_ADDR
, 0);
3500 * cik_cp_compute_start - start the compute queues
3502 * @rdev: radeon_device pointer
3504 * Enable the compute queues.
3505 * Returns 0 for success, error for failure.
3507 static int cik_cp_compute_start(struct radeon_device
*rdev
)
3509 cik_cp_compute_enable(rdev
, true);
3515 * cik_cp_compute_fini - stop the compute queues
3517 * @rdev: radeon_device pointer
3519 * Stop the compute queues and tear down the driver queue
3522 static void cik_cp_compute_fini(struct radeon_device
*rdev
)
3526 cik_cp_compute_enable(rdev
, false);
3528 for (i
= 0; i
< 2; i
++) {
3530 idx
= CAYMAN_RING_TYPE_CP1_INDEX
;
3532 idx
= CAYMAN_RING_TYPE_CP2_INDEX
;
3534 if (rdev
->ring
[idx
].mqd_obj
) {
3535 r
= radeon_bo_reserve(rdev
->ring
[idx
].mqd_obj
, false);
3536 if (unlikely(r
!= 0))
3537 dev_warn(rdev
->dev
, "(%d) reserve MQD bo failed\n", r
);
3539 radeon_bo_unpin(rdev
->ring
[idx
].mqd_obj
);
3540 radeon_bo_unreserve(rdev
->ring
[idx
].mqd_obj
);
3542 radeon_bo_unref(&rdev
->ring
[idx
].mqd_obj
);
3543 rdev
->ring
[idx
].mqd_obj
= NULL
;
3548 static void cik_mec_fini(struct radeon_device
*rdev
)
3552 if (rdev
->mec
.hpd_eop_obj
) {
3553 r
= radeon_bo_reserve(rdev
->mec
.hpd_eop_obj
, false);
3554 if (unlikely(r
!= 0))
3555 dev_warn(rdev
->dev
, "(%d) reserve HPD EOP bo failed\n", r
);
3556 radeon_bo_unpin(rdev
->mec
.hpd_eop_obj
);
3557 radeon_bo_unreserve(rdev
->mec
.hpd_eop_obj
);
3559 radeon_bo_unref(&rdev
->mec
.hpd_eop_obj
);
3560 rdev
->mec
.hpd_eop_obj
= NULL
;
3564 #define MEC_HPD_SIZE 2048
3566 static int cik_mec_init(struct radeon_device
*rdev
)
3572 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3573 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3575 if (rdev
->family
== CHIP_KAVERI
)
3576 rdev
->mec
.num_mec
= 2;
3578 rdev
->mec
.num_mec
= 1;
3579 rdev
->mec
.num_pipe
= 4;
3580 rdev
->mec
.num_queue
= rdev
->mec
.num_mec
* rdev
->mec
.num_pipe
* 8;
3582 if (rdev
->mec
.hpd_eop_obj
== NULL
) {
3583 r
= radeon_bo_create(rdev
,
3584 rdev
->mec
.num_mec
*rdev
->mec
.num_pipe
* MEC_HPD_SIZE
* 2,
3586 RADEON_GEM_DOMAIN_GTT
, NULL
,
3587 &rdev
->mec
.hpd_eop_obj
);
3589 dev_warn(rdev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
3594 r
= radeon_bo_reserve(rdev
->mec
.hpd_eop_obj
, false);
3595 if (unlikely(r
!= 0)) {
3599 r
= radeon_bo_pin(rdev
->mec
.hpd_eop_obj
, RADEON_GEM_DOMAIN_GTT
,
3600 &rdev
->mec
.hpd_eop_gpu_addr
);
3602 dev_warn(rdev
->dev
, "(%d) pin HDP EOP bo failed\n", r
);
3606 r
= radeon_bo_kmap(rdev
->mec
.hpd_eop_obj
, (void **)&hpd
);
3608 dev_warn(rdev
->dev
, "(%d) map HDP EOP bo failed\n", r
);
3613 /* clear memory. Not sure if this is required or not */
3614 memset(hpd
, 0, rdev
->mec
.num_mec
*rdev
->mec
.num_pipe
* MEC_HPD_SIZE
* 2);
3616 radeon_bo_kunmap(rdev
->mec
.hpd_eop_obj
);
3617 radeon_bo_unreserve(rdev
->mec
.hpd_eop_obj
);
3622 struct hqd_registers
3624 u32 cp_mqd_base_addr
;
3625 u32 cp_mqd_base_addr_hi
;
3628 u32 cp_hqd_persistent_state
;
3629 u32 cp_hqd_pipe_priority
;
3630 u32 cp_hqd_queue_priority
;
3633 u32 cp_hqd_pq_base_hi
;
3635 u32 cp_hqd_pq_rptr_report_addr
;
3636 u32 cp_hqd_pq_rptr_report_addr_hi
;
3637 u32 cp_hqd_pq_wptr_poll_addr
;
3638 u32 cp_hqd_pq_wptr_poll_addr_hi
;
3639 u32 cp_hqd_pq_doorbell_control
;
3641 u32 cp_hqd_pq_control
;
3642 u32 cp_hqd_ib_base_addr
;
3643 u32 cp_hqd_ib_base_addr_hi
;
3645 u32 cp_hqd_ib_control
;
3646 u32 cp_hqd_iq_timer
;
3648 u32 cp_hqd_dequeue_request
;
3649 u32 cp_hqd_dma_offload
;
3650 u32 cp_hqd_sema_cmd
;
3651 u32 cp_hqd_msg_type
;
3652 u32 cp_hqd_atomic0_preop_lo
;
3653 u32 cp_hqd_atomic0_preop_hi
;
3654 u32 cp_hqd_atomic1_preop_lo
;
3655 u32 cp_hqd_atomic1_preop_hi
;
3656 u32 cp_hqd_hq_scheduler0
;
3657 u32 cp_hqd_hq_scheduler1
;
3664 u32 dispatch_initiator
;
3668 u32 pipeline_stat_enable
;
3669 u32 perf_counter_enable
;
3675 u32 resource_limits
;
3676 u32 static_thread_mgmt01
[2];
3678 u32 static_thread_mgmt23
[2];
3680 u32 thread_trace_enable
;
3683 u32 vgtcs_invoke_count
[2];
3684 struct hqd_registers queue_state
;
3686 u32 interrupt_queue
[64];
3690 * cik_cp_compute_resume - setup the compute queue registers
3692 * @rdev: radeon_device pointer
3694 * Program the compute queues and test them to make sure they
3696 * Returns 0 for success, error for failure.
3698 static int cik_cp_compute_resume(struct radeon_device
*rdev
)
3702 bool use_doorbell
= true;
3708 struct bonaire_mqd
*mqd
;
3710 r
= cik_cp_compute_start(rdev
);
3714 /* fix up chicken bits */
3715 tmp
= RREG32(CP_CPF_DEBUG
);
3717 WREG32(CP_CPF_DEBUG
, tmp
);
3719 /* init the pipes */
3720 mutex_lock(&rdev
->srbm_mutex
);
3721 for (i
= 0; i
< (rdev
->mec
.num_pipe
* rdev
->mec
.num_mec
); i
++) {
3722 int me
= (i
< 4) ? 1 : 2;
3723 int pipe
= (i
< 4) ? i
: (i
- 4);
3725 eop_gpu_addr
= rdev
->mec
.hpd_eop_gpu_addr
+ (i
* MEC_HPD_SIZE
* 2);
3727 cik_srbm_select(rdev
, me
, pipe
, 0, 0);
3729 /* write the EOP addr */
3730 WREG32(CP_HPD_EOP_BASE_ADDR
, eop_gpu_addr
>> 8);
3731 WREG32(CP_HPD_EOP_BASE_ADDR_HI
, upper_32_bits(eop_gpu_addr
) >> 8);
3733 /* set the VMID assigned */
3734 WREG32(CP_HPD_EOP_VMID
, 0);
3736 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3737 tmp
= RREG32(CP_HPD_EOP_CONTROL
);
3738 tmp
&= ~EOP_SIZE_MASK
;
3739 tmp
|= drm_order(MEC_HPD_SIZE
/ 8);
3740 WREG32(CP_HPD_EOP_CONTROL
, tmp
);
3742 cik_srbm_select(rdev
, 0, 0, 0, 0);
3743 mutex_unlock(&rdev
->srbm_mutex
);
3745 /* init the queues. Just two for now. */
3746 for (i
= 0; i
< 2; i
++) {
3748 idx
= CAYMAN_RING_TYPE_CP1_INDEX
;
3750 idx
= CAYMAN_RING_TYPE_CP2_INDEX
;
3752 if (rdev
->ring
[idx
].mqd_obj
== NULL
) {
3753 r
= radeon_bo_create(rdev
,
3754 sizeof(struct bonaire_mqd
),
3756 RADEON_GEM_DOMAIN_GTT
, NULL
,
3757 &rdev
->ring
[idx
].mqd_obj
);
3759 dev_warn(rdev
->dev
, "(%d) create MQD bo failed\n", r
);
3764 r
= radeon_bo_reserve(rdev
->ring
[idx
].mqd_obj
, false);
3765 if (unlikely(r
!= 0)) {
3766 cik_cp_compute_fini(rdev
);
3769 r
= radeon_bo_pin(rdev
->ring
[idx
].mqd_obj
, RADEON_GEM_DOMAIN_GTT
,
3772 dev_warn(rdev
->dev
, "(%d) pin MQD bo failed\n", r
);
3773 cik_cp_compute_fini(rdev
);
3776 r
= radeon_bo_kmap(rdev
->ring
[idx
].mqd_obj
, (void **)&buf
);
3778 dev_warn(rdev
->dev
, "(%d) map MQD bo failed\n", r
);
3779 cik_cp_compute_fini(rdev
);
3783 /* doorbell offset */
3784 rdev
->ring
[idx
].doorbell_offset
=
3785 (rdev
->ring
[idx
].doorbell_page_num
* PAGE_SIZE
) + 0;
3787 /* init the mqd struct */
3788 memset(buf
, 0, sizeof(struct bonaire_mqd
));
3790 mqd
= (struct bonaire_mqd
*)buf
;
3791 mqd
->header
= 0xC0310800;
3792 mqd
->static_thread_mgmt01
[0] = 0xffffffff;
3793 mqd
->static_thread_mgmt01
[1] = 0xffffffff;
3794 mqd
->static_thread_mgmt23
[0] = 0xffffffff;
3795 mqd
->static_thread_mgmt23
[1] = 0xffffffff;
3797 mutex_lock(&rdev
->srbm_mutex
);
3798 cik_srbm_select(rdev
, rdev
->ring
[idx
].me
,
3799 rdev
->ring
[idx
].pipe
,
3800 rdev
->ring
[idx
].queue
, 0);
3802 /* disable wptr polling */
3803 tmp
= RREG32(CP_PQ_WPTR_POLL_CNTL
);
3804 tmp
&= ~WPTR_POLL_EN
;
3805 WREG32(CP_PQ_WPTR_POLL_CNTL
, tmp
);
3807 /* enable doorbell? */
3808 mqd
->queue_state
.cp_hqd_pq_doorbell_control
=
3809 RREG32(CP_HQD_PQ_DOORBELL_CONTROL
);
3811 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|= DOORBELL_EN
;
3813 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&= ~DOORBELL_EN
;
3814 WREG32(CP_HQD_PQ_DOORBELL_CONTROL
,
3815 mqd
->queue_state
.cp_hqd_pq_doorbell_control
);
3817 /* disable the queue if it's active */
3818 mqd
->queue_state
.cp_hqd_dequeue_request
= 0;
3819 mqd
->queue_state
.cp_hqd_pq_rptr
= 0;
3820 mqd
->queue_state
.cp_hqd_pq_wptr
= 0;
3821 if (RREG32(CP_HQD_ACTIVE
) & 1) {
3822 WREG32(CP_HQD_DEQUEUE_REQUEST
, 1);
3823 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3824 if (!(RREG32(CP_HQD_ACTIVE
) & 1))
3828 WREG32(CP_HQD_DEQUEUE_REQUEST
, mqd
->queue_state
.cp_hqd_dequeue_request
);
3829 WREG32(CP_HQD_PQ_RPTR
, mqd
->queue_state
.cp_hqd_pq_rptr
);
3830 WREG32(CP_HQD_PQ_WPTR
, mqd
->queue_state
.cp_hqd_pq_wptr
);
3833 /* set the pointer to the MQD */
3834 mqd
->queue_state
.cp_mqd_base_addr
= mqd_gpu_addr
& 0xfffffffc;
3835 mqd
->queue_state
.cp_mqd_base_addr_hi
= upper_32_bits(mqd_gpu_addr
);
3836 WREG32(CP_MQD_BASE_ADDR
, mqd
->queue_state
.cp_mqd_base_addr
);
3837 WREG32(CP_MQD_BASE_ADDR_HI
, mqd
->queue_state
.cp_mqd_base_addr_hi
);
3838 /* set MQD vmid to 0 */
3839 mqd
->queue_state
.cp_mqd_control
= RREG32(CP_MQD_CONTROL
);
3840 mqd
->queue_state
.cp_mqd_control
&= ~MQD_VMID_MASK
;
3841 WREG32(CP_MQD_CONTROL
, mqd
->queue_state
.cp_mqd_control
);
3843 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3844 hqd_gpu_addr
= rdev
->ring
[idx
].gpu_addr
>> 8;
3845 mqd
->queue_state
.cp_hqd_pq_base
= hqd_gpu_addr
;
3846 mqd
->queue_state
.cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
3847 WREG32(CP_HQD_PQ_BASE
, mqd
->queue_state
.cp_hqd_pq_base
);
3848 WREG32(CP_HQD_PQ_BASE_HI
, mqd
->queue_state
.cp_hqd_pq_base_hi
);
3850 /* set up the HQD, this is similar to CP_RB0_CNTL */
3851 mqd
->queue_state
.cp_hqd_pq_control
= RREG32(CP_HQD_PQ_CONTROL
);
3852 mqd
->queue_state
.cp_hqd_pq_control
&=
3853 ~(QUEUE_SIZE_MASK
| RPTR_BLOCK_SIZE_MASK
);
3855 mqd
->queue_state
.cp_hqd_pq_control
|=
3856 drm_order(rdev
->ring
[idx
].ring_size
/ 8);
3857 mqd
->queue_state
.cp_hqd_pq_control
|=
3858 (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8);
3860 mqd
->queue_state
.cp_hqd_pq_control
|= BUF_SWAP_32BIT
;
3862 mqd
->queue_state
.cp_hqd_pq_control
&=
3863 ~(UNORD_DISPATCH
| ROQ_PQ_IB_FLIP
| PQ_VOLATILE
);
3864 mqd
->queue_state
.cp_hqd_pq_control
|=
3865 PRIV_STATE
| KMD_QUEUE
; /* assuming kernel queue control */
3866 WREG32(CP_HQD_PQ_CONTROL
, mqd
->queue_state
.cp_hqd_pq_control
);
3868 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3870 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ CIK_WB_CP1_WPTR_OFFSET
;
3872 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ CIK_WB_CP2_WPTR_OFFSET
;
3873 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr
= wb_gpu_addr
& 0xfffffffc;
3874 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3875 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR
, mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr
);
3876 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI
,
3877 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr_hi
);
3879 /* set the wb address wether it's enabled or not */
3881 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP1_RPTR_OFFSET
;
3883 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP2_RPTR_OFFSET
;
3884 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr
= wb_gpu_addr
& 0xfffffffc;
3885 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr_hi
=
3886 upper_32_bits(wb_gpu_addr
) & 0xffff;
3887 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR
,
3888 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr
);
3889 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
3890 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr_hi
);
3892 /* enable the doorbell if requested */
3894 mqd
->queue_state
.cp_hqd_pq_doorbell_control
=
3895 RREG32(CP_HQD_PQ_DOORBELL_CONTROL
);
3896 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&= ~DOORBELL_OFFSET_MASK
;
3897 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|=
3898 DOORBELL_OFFSET(rdev
->ring
[idx
].doorbell_offset
/ 4);
3899 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|= DOORBELL_EN
;
3900 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&=
3901 ~(DOORBELL_SOURCE
| DOORBELL_HIT
);
3904 mqd
->queue_state
.cp_hqd_pq_doorbell_control
= 0;
3906 WREG32(CP_HQD_PQ_DOORBELL_CONTROL
,
3907 mqd
->queue_state
.cp_hqd_pq_doorbell_control
);
3909 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3910 rdev
->ring
[idx
].wptr
= 0;
3911 mqd
->queue_state
.cp_hqd_pq_wptr
= rdev
->ring
[idx
].wptr
;
3912 WREG32(CP_HQD_PQ_WPTR
, mqd
->queue_state
.cp_hqd_pq_wptr
);
3913 rdev
->ring
[idx
].rptr
= RREG32(CP_HQD_PQ_RPTR
);
3914 mqd
->queue_state
.cp_hqd_pq_rptr
= rdev
->ring
[idx
].rptr
;
3916 /* set the vmid for the queue */
3917 mqd
->queue_state
.cp_hqd_vmid
= 0;
3918 WREG32(CP_HQD_VMID
, mqd
->queue_state
.cp_hqd_vmid
);
3920 /* activate the queue */
3921 mqd
->queue_state
.cp_hqd_active
= 1;
3922 WREG32(CP_HQD_ACTIVE
, mqd
->queue_state
.cp_hqd_active
);
3924 cik_srbm_select(rdev
, 0, 0, 0, 0);
3925 mutex_unlock(&rdev
->srbm_mutex
);
3927 radeon_bo_kunmap(rdev
->ring
[idx
].mqd_obj
);
3928 radeon_bo_unreserve(rdev
->ring
[idx
].mqd_obj
);
3930 rdev
->ring
[idx
].ready
= true;
3931 r
= radeon_ring_test(rdev
, idx
, &rdev
->ring
[idx
]);
3933 rdev
->ring
[idx
].ready
= false;
3939 static void cik_cp_enable(struct radeon_device
*rdev
, bool enable
)
3941 cik_cp_gfx_enable(rdev
, enable
);
3942 cik_cp_compute_enable(rdev
, enable
);
3945 static int cik_cp_load_microcode(struct radeon_device
*rdev
)
3949 r
= cik_cp_gfx_load_microcode(rdev
);
3952 r
= cik_cp_compute_load_microcode(rdev
);
3959 static void cik_cp_fini(struct radeon_device
*rdev
)
3961 cik_cp_gfx_fini(rdev
);
3962 cik_cp_compute_fini(rdev
);
3965 static int cik_cp_resume(struct radeon_device
*rdev
)
3969 /* Reset all cp blocks */
3970 WREG32(GRBM_SOFT_RESET
, SOFT_RESET_CP
);
3971 RREG32(GRBM_SOFT_RESET
);
3973 WREG32(GRBM_SOFT_RESET
, 0);
3974 RREG32(GRBM_SOFT_RESET
);
3976 r
= cik_cp_load_microcode(rdev
);
3980 r
= cik_cp_gfx_resume(rdev
);
3983 r
= cik_cp_compute_resume(rdev
);
3992 * Starting with CIK, the GPU has new asynchronous
3993 * DMA engines. These engines are used for compute
3994 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3995 * and each one supports 1 ring buffer used for gfx
3996 * and 2 queues used for compute.
3998 * The programming model is very similar to the CP
3999 * (ring buffer, IBs, etc.), but sDMA has it's own
4000 * packet format that is different from the PM4 format
4001 * used by the CP. sDMA supports copying data, writing
4002 * embedded data, solid fills, and a number of other
4003 * things. It also has support for tiling/detiling of
4007 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
4009 * @rdev: radeon_device pointer
4010 * @ib: IB object to schedule
4012 * Schedule an IB in the DMA ring (CIK).
4014 void cik_sdma_ring_ib_execute(struct radeon_device
*rdev
,
4015 struct radeon_ib
*ib
)
4017 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
4018 u32 extra_bits
= (ib
->vm
? ib
->vm
->id
: 0) & 0xf;
4020 if (rdev
->wb
.enabled
) {
4021 u32 next_rptr
= ring
->wptr
+ 5;
4022 while ((next_rptr
& 7) != 4)
4025 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0));
4026 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
4027 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
4028 radeon_ring_write(ring
, 1); /* number of DWs to follow */
4029 radeon_ring_write(ring
, next_rptr
);
4032 /* IB packet must end on a 8 DW boundary */
4033 while ((ring
->wptr
& 7) != 4)
4034 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
4035 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER
, 0, extra_bits
));
4036 radeon_ring_write(ring
, ib
->gpu_addr
& 0xffffffe0); /* base must be 32 byte aligned */
4037 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xffffffff);
4038 radeon_ring_write(ring
, ib
->length_dw
);
4043 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4045 * @rdev: radeon_device pointer
4046 * @fence: radeon fence object
4048 * Add a DMA fence packet to the ring to write
4049 * the fence seq number and DMA trap packet to generate
4050 * an interrupt if needed (CIK).
4052 void cik_sdma_fence_ring_emit(struct radeon_device
*rdev
,
4053 struct radeon_fence
*fence
)
4055 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
4056 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
4057 u32 extra_bits
= (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4058 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4061 if (fence
->ring
== R600_RING_TYPE_DMA_INDEX
)
4062 ref_and_mask
= SDMA0
;
4064 ref_and_mask
= SDMA1
;
4066 /* write the fence */
4067 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_FENCE
, 0, 0));
4068 radeon_ring_write(ring
, addr
& 0xffffffff);
4069 radeon_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
4070 radeon_ring_write(ring
, fence
->seq
);
4071 /* generate an interrupt */
4072 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_TRAP
, 0, 0));
4074 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM
, 0, extra_bits
));
4075 radeon_ring_write(ring
, GPU_HDP_FLUSH_DONE
);
4076 radeon_ring_write(ring
, GPU_HDP_FLUSH_REQ
);
4077 radeon_ring_write(ring
, ref_and_mask
); /* REFERENCE */
4078 radeon_ring_write(ring
, ref_and_mask
); /* MASK */
4079 radeon_ring_write(ring
, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4083 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4085 * @rdev: radeon_device pointer
4086 * @ring: radeon_ring structure holding ring information
4087 * @semaphore: radeon semaphore object
4088 * @emit_wait: wait or signal semaphore
4090 * Add a DMA semaphore packet to the ring wait on or signal
4091 * other rings (CIK).
4093 void cik_sdma_semaphore_ring_emit(struct radeon_device
*rdev
,
4094 struct radeon_ring
*ring
,
4095 struct radeon_semaphore
*semaphore
,
4098 u64 addr
= semaphore
->gpu_addr
;
4099 u32 extra_bits
= emit_wait
? 0 : SDMA_SEMAPHORE_EXTRA_S
;
4101 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE
, 0, extra_bits
));
4102 radeon_ring_write(ring
, addr
& 0xfffffff8);
4103 radeon_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
4107 * cik_sdma_gfx_stop - stop the gfx async dma engines
4109 * @rdev: radeon_device pointer
4111 * Stop the gfx async dma ring buffers (CIK).
4113 static void cik_sdma_gfx_stop(struct radeon_device
*rdev
)
4115 u32 rb_cntl
, reg_offset
;
4118 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
4120 for (i
= 0; i
< 2; i
++) {
4122 reg_offset
= SDMA0_REGISTER_OFFSET
;
4124 reg_offset
= SDMA1_REGISTER_OFFSET
;
4125 rb_cntl
= RREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
);
4126 rb_cntl
&= ~SDMA_RB_ENABLE
;
4127 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
);
4128 WREG32(SDMA0_GFX_IB_CNTL
+ reg_offset
, 0);
4133 * cik_sdma_rlc_stop - stop the compute async dma engines
4135 * @rdev: radeon_device pointer
4137 * Stop the compute async dma queues (CIK).
4139 static void cik_sdma_rlc_stop(struct radeon_device
*rdev
)
4145 * cik_sdma_enable - stop the async dma engines
4147 * @rdev: radeon_device pointer
4148 * @enable: enable/disable the DMA MEs.
4150 * Halt or unhalt the async dma engines (CIK).
4152 static void cik_sdma_enable(struct radeon_device
*rdev
, bool enable
)
4154 u32 me_cntl
, reg_offset
;
4157 for (i
= 0; i
< 2; i
++) {
4159 reg_offset
= SDMA0_REGISTER_OFFSET
;
4161 reg_offset
= SDMA1_REGISTER_OFFSET
;
4162 me_cntl
= RREG32(SDMA0_ME_CNTL
+ reg_offset
);
4164 me_cntl
&= ~SDMA_HALT
;
4166 me_cntl
|= SDMA_HALT
;
4167 WREG32(SDMA0_ME_CNTL
+ reg_offset
, me_cntl
);
4172 * cik_sdma_gfx_resume - setup and start the async dma engines
4174 * @rdev: radeon_device pointer
4176 * Set up the gfx DMA ring buffers and enable them (CIK).
4177 * Returns 0 for success, error for failure.
4179 static int cik_sdma_gfx_resume(struct radeon_device
*rdev
)
4181 struct radeon_ring
*ring
;
4182 u32 rb_cntl
, ib_cntl
;
4184 u32 reg_offset
, wb_offset
;
4187 for (i
= 0; i
< 2; i
++) {
4189 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
4190 reg_offset
= SDMA0_REGISTER_OFFSET
;
4191 wb_offset
= R600_WB_DMA_RPTR_OFFSET
;
4193 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
4194 reg_offset
= SDMA1_REGISTER_OFFSET
;
4195 wb_offset
= CAYMAN_WB_DMA1_RPTR_OFFSET
;
4198 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL
+ reg_offset
, 0);
4199 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL
+ reg_offset
, 0);
4201 /* Set ring buffer size in dwords */
4202 rb_bufsz
= drm_order(ring
->ring_size
/ 4);
4203 rb_cntl
= rb_bufsz
<< 1;
4205 rb_cntl
|= SDMA_RB_SWAP_ENABLE
| SDMA_RPTR_WRITEBACK_SWAP_ENABLE
;
4207 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
);
4209 /* Initialize the ring buffer's read and write pointers */
4210 WREG32(SDMA0_GFX_RB_RPTR
+ reg_offset
, 0);
4211 WREG32(SDMA0_GFX_RB_WPTR
+ reg_offset
, 0);
4213 /* set the wb address whether it's enabled or not */
4214 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI
+ reg_offset
,
4215 upper_32_bits(rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFF);
4216 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO
+ reg_offset
,
4217 ((rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFC));
4219 if (rdev
->wb
.enabled
)
4220 rb_cntl
|= SDMA_RPTR_WRITEBACK_ENABLE
;
4222 WREG32(SDMA0_GFX_RB_BASE
+ reg_offset
, ring
->gpu_addr
>> 8);
4223 WREG32(SDMA0_GFX_RB_BASE_HI
+ reg_offset
, ring
->gpu_addr
>> 40);
4226 WREG32(SDMA0_GFX_RB_WPTR
+ reg_offset
, ring
->wptr
<< 2);
4228 ring
->rptr
= RREG32(SDMA0_GFX_RB_RPTR
+ reg_offset
) >> 2;
4231 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
| SDMA_RB_ENABLE
);
4233 ib_cntl
= SDMA_IB_ENABLE
;
4235 ib_cntl
|= SDMA_IB_SWAP_ENABLE
;
4237 /* enable DMA IBs */
4238 WREG32(SDMA0_GFX_IB_CNTL
+ reg_offset
, ib_cntl
);
4242 r
= radeon_ring_test(rdev
, ring
->idx
, ring
);
4244 ring
->ready
= false;
4249 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.real_vram_size
);
4255 * cik_sdma_rlc_resume - setup and start the async dma engines
4257 * @rdev: radeon_device pointer
4259 * Set up the compute DMA queues and enable them (CIK).
4260 * Returns 0 for success, error for failure.
4262 static int cik_sdma_rlc_resume(struct radeon_device
*rdev
)
4269 * cik_sdma_load_microcode - load the sDMA ME ucode
4271 * @rdev: radeon_device pointer
4273 * Loads the sDMA0/1 ucode.
4274 * Returns 0 for success, -EINVAL if the ucode is not available.
4276 static int cik_sdma_load_microcode(struct radeon_device
*rdev
)
4278 const __be32
*fw_data
;
4284 /* stop the gfx rings and rlc compute queues */
4285 cik_sdma_gfx_stop(rdev
);
4286 cik_sdma_rlc_stop(rdev
);
4289 cik_sdma_enable(rdev
, false);
4292 fw_data
= (const __be32
*)rdev
->sdma_fw
->data
;
4293 WREG32(SDMA0_UCODE_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
4294 for (i
= 0; i
< CIK_SDMA_UCODE_SIZE
; i
++)
4295 WREG32(SDMA0_UCODE_DATA
+ SDMA0_REGISTER_OFFSET
, be32_to_cpup(fw_data
++));
4296 WREG32(SDMA0_UCODE_DATA
+ SDMA0_REGISTER_OFFSET
, CIK_SDMA_UCODE_VERSION
);
4299 fw_data
= (const __be32
*)rdev
->sdma_fw
->data
;
4300 WREG32(SDMA0_UCODE_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
4301 for (i
= 0; i
< CIK_SDMA_UCODE_SIZE
; i
++)
4302 WREG32(SDMA0_UCODE_DATA
+ SDMA1_REGISTER_OFFSET
, be32_to_cpup(fw_data
++));
4303 WREG32(SDMA0_UCODE_DATA
+ SDMA1_REGISTER_OFFSET
, CIK_SDMA_UCODE_VERSION
);
4305 WREG32(SDMA0_UCODE_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
4306 WREG32(SDMA0_UCODE_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
4311 * cik_sdma_resume - setup and start the async dma engines
4313 * @rdev: radeon_device pointer
4315 * Set up the DMA engines and enable them (CIK).
4316 * Returns 0 for success, error for failure.
4318 static int cik_sdma_resume(struct radeon_device
*rdev
)
4323 WREG32(SRBM_SOFT_RESET
, SOFT_RESET_SDMA
| SOFT_RESET_SDMA1
);
4324 RREG32(SRBM_SOFT_RESET
);
4326 WREG32(SRBM_SOFT_RESET
, 0);
4327 RREG32(SRBM_SOFT_RESET
);
4329 r
= cik_sdma_load_microcode(rdev
);
4333 /* unhalt the MEs */
4334 cik_sdma_enable(rdev
, true);
4336 /* start the gfx rings and rlc compute queues */
4337 r
= cik_sdma_gfx_resume(rdev
);
4340 r
= cik_sdma_rlc_resume(rdev
);
4348 * cik_sdma_fini - tear down the async dma engines
4350 * @rdev: radeon_device pointer
4352 * Stop the async dma engines and free the rings (CIK).
4354 static void cik_sdma_fini(struct radeon_device
*rdev
)
4356 /* stop the gfx rings and rlc compute queues */
4357 cik_sdma_gfx_stop(rdev
);
4358 cik_sdma_rlc_stop(rdev
);
4360 cik_sdma_enable(rdev
, false);
4361 radeon_ring_fini(rdev
, &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
]);
4362 radeon_ring_fini(rdev
, &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
]);
4363 /* XXX - compute dma queue tear down */
4367 * cik_copy_dma - copy pages using the DMA engine
4369 * @rdev: radeon_device pointer
4370 * @src_offset: src GPU address
4371 * @dst_offset: dst GPU address
4372 * @num_gpu_pages: number of GPU pages to xfer
4373 * @fence: radeon fence object
4375 * Copy GPU paging using the DMA engine (CIK).
4376 * Used by the radeon ttm implementation to move pages if
4377 * registered as the asic copy callback.
4379 int cik_copy_dma(struct radeon_device
*rdev
,
4380 uint64_t src_offset
, uint64_t dst_offset
,
4381 unsigned num_gpu_pages
,
4382 struct radeon_fence
**fence
)
4384 struct radeon_semaphore
*sem
= NULL
;
4385 int ring_index
= rdev
->asic
->copy
.dma_ring_index
;
4386 struct radeon_ring
*ring
= &rdev
->ring
[ring_index
];
4387 u32 size_in_bytes
, cur_size_in_bytes
;
4391 r
= radeon_semaphore_create(rdev
, &sem
);
4393 DRM_ERROR("radeon: moving bo (%d).\n", r
);
4397 size_in_bytes
= (num_gpu_pages
<< RADEON_GPU_PAGE_SHIFT
);
4398 num_loops
= DIV_ROUND_UP(size_in_bytes
, 0x1fffff);
4399 r
= radeon_ring_lock(rdev
, ring
, num_loops
* 7 + 14);
4401 DRM_ERROR("radeon: moving bo (%d).\n", r
);
4402 radeon_semaphore_free(rdev
, &sem
, NULL
);
4406 if (radeon_fence_need_sync(*fence
, ring
->idx
)) {
4407 radeon_semaphore_sync_rings(rdev
, sem
, (*fence
)->ring
,
4409 radeon_fence_note_sync(*fence
, ring
->idx
);
4411 radeon_semaphore_free(rdev
, &sem
, NULL
);
4414 for (i
= 0; i
< num_loops
; i
++) {
4415 cur_size_in_bytes
= size_in_bytes
;
4416 if (cur_size_in_bytes
> 0x1fffff)
4417 cur_size_in_bytes
= 0x1fffff;
4418 size_in_bytes
-= cur_size_in_bytes
;
4419 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_COPY
, SDMA_COPY_SUB_OPCODE_LINEAR
, 0));
4420 radeon_ring_write(ring
, cur_size_in_bytes
);
4421 radeon_ring_write(ring
, 0); /* src/dst endian swap */
4422 radeon_ring_write(ring
, src_offset
& 0xffffffff);
4423 radeon_ring_write(ring
, upper_32_bits(src_offset
) & 0xffffffff);
4424 radeon_ring_write(ring
, dst_offset
& 0xfffffffc);
4425 radeon_ring_write(ring
, upper_32_bits(dst_offset
) & 0xffffffff);
4426 src_offset
+= cur_size_in_bytes
;
4427 dst_offset
+= cur_size_in_bytes
;
4430 r
= radeon_fence_emit(rdev
, fence
, ring
->idx
);
4432 radeon_ring_unlock_undo(rdev
, ring
);
4436 radeon_ring_unlock_commit(rdev
, ring
);
4437 radeon_semaphore_free(rdev
, &sem
, *fence
);
4443 * cik_sdma_ring_test - simple async dma engine test
4445 * @rdev: radeon_device pointer
4446 * @ring: radeon_ring structure holding ring information
4448 * Test the DMA engine by writing using it to write an
4449 * value to memory. (CIK).
4450 * Returns 0 for success, error for failure.
4452 int cik_sdma_ring_test(struct radeon_device
*rdev
,
4453 struct radeon_ring
*ring
)
4457 void __iomem
*ptr
= (void *)rdev
->vram_scratch
.ptr
;
4461 DRM_ERROR("invalid vram scratch pointer\n");
4468 r
= radeon_ring_lock(rdev
, ring
, 4);
4470 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring
->idx
, r
);
4473 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0));
4474 radeon_ring_write(ring
, rdev
->vram_scratch
.gpu_addr
& 0xfffffffc);
4475 radeon_ring_write(ring
, upper_32_bits(rdev
->vram_scratch
.gpu_addr
) & 0xffffffff);
4476 radeon_ring_write(ring
, 1); /* number of DWs to follow */
4477 radeon_ring_write(ring
, 0xDEADBEEF);
4478 radeon_ring_unlock_commit(rdev
, ring
);
4480 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
4482 if (tmp
== 0xDEADBEEF)
4487 if (i
< rdev
->usec_timeout
) {
4488 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
4490 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4498 * cik_sdma_ib_test - test an IB on the DMA engine
4500 * @rdev: radeon_device pointer
4501 * @ring: radeon_ring structure holding ring information
4503 * Test a simple IB in the DMA ring (CIK).
4504 * Returns 0 on success, error on failure.
4506 int cik_sdma_ib_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
4508 struct radeon_ib ib
;
4511 void __iomem
*ptr
= (void *)rdev
->vram_scratch
.ptr
;
4515 DRM_ERROR("invalid vram scratch pointer\n");
4522 r
= radeon_ib_get(rdev
, ring
->idx
, &ib
, NULL
, 256);
4524 DRM_ERROR("radeon: failed to get ib (%d).\n", r
);
4528 ib
.ptr
[0] = SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0);
4529 ib
.ptr
[1] = rdev
->vram_scratch
.gpu_addr
& 0xfffffffc;
4530 ib
.ptr
[2] = upper_32_bits(rdev
->vram_scratch
.gpu_addr
) & 0xffffffff;
4532 ib
.ptr
[4] = 0xDEADBEEF;
4535 r
= radeon_ib_schedule(rdev
, &ib
, NULL
);
4537 radeon_ib_free(rdev
, &ib
);
4538 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r
);
4541 r
= radeon_fence_wait(ib
.fence
, false);
4543 DRM_ERROR("radeon: fence wait failed (%d).\n", r
);
4546 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
4548 if (tmp
== 0xDEADBEEF)
4552 if (i
< rdev
->usec_timeout
) {
4553 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib
.fence
->ring
, i
);
4555 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp
);
4558 radeon_ib_free(rdev
, &ib
);
4563 static void cik_print_gpu_status_regs(struct radeon_device
*rdev
)
4565 dev_info(rdev
->dev
, " GRBM_STATUS=0x%08X\n",
4566 RREG32(GRBM_STATUS
));
4567 dev_info(rdev
->dev
, " GRBM_STATUS2=0x%08X\n",
4568 RREG32(GRBM_STATUS2
));
4569 dev_info(rdev
->dev
, " GRBM_STATUS_SE0=0x%08X\n",
4570 RREG32(GRBM_STATUS_SE0
));
4571 dev_info(rdev
->dev
, " GRBM_STATUS_SE1=0x%08X\n",
4572 RREG32(GRBM_STATUS_SE1
));
4573 dev_info(rdev
->dev
, " GRBM_STATUS_SE2=0x%08X\n",
4574 RREG32(GRBM_STATUS_SE2
));
4575 dev_info(rdev
->dev
, " GRBM_STATUS_SE3=0x%08X\n",
4576 RREG32(GRBM_STATUS_SE3
));
4577 dev_info(rdev
->dev
, " SRBM_STATUS=0x%08X\n",
4578 RREG32(SRBM_STATUS
));
4579 dev_info(rdev
->dev
, " SRBM_STATUS2=0x%08X\n",
4580 RREG32(SRBM_STATUS2
));
4581 dev_info(rdev
->dev
, " SDMA0_STATUS_REG = 0x%08X\n",
4582 RREG32(SDMA0_STATUS_REG
+ SDMA0_REGISTER_OFFSET
));
4583 dev_info(rdev
->dev
, " SDMA1_STATUS_REG = 0x%08X\n",
4584 RREG32(SDMA0_STATUS_REG
+ SDMA1_REGISTER_OFFSET
));
4585 dev_info(rdev
->dev
, " CP_STAT = 0x%08x\n", RREG32(CP_STAT
));
4586 dev_info(rdev
->dev
, " CP_STALLED_STAT1 = 0x%08x\n",
4587 RREG32(CP_STALLED_STAT1
));
4588 dev_info(rdev
->dev
, " CP_STALLED_STAT2 = 0x%08x\n",
4589 RREG32(CP_STALLED_STAT2
));
4590 dev_info(rdev
->dev
, " CP_STALLED_STAT3 = 0x%08x\n",
4591 RREG32(CP_STALLED_STAT3
));
4592 dev_info(rdev
->dev
, " CP_CPF_BUSY_STAT = 0x%08x\n",
4593 RREG32(CP_CPF_BUSY_STAT
));
4594 dev_info(rdev
->dev
, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4595 RREG32(CP_CPF_STALLED_STAT1
));
4596 dev_info(rdev
->dev
, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS
));
4597 dev_info(rdev
->dev
, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT
));
4598 dev_info(rdev
->dev
, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4599 RREG32(CP_CPC_STALLED_STAT1
));
4600 dev_info(rdev
->dev
, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS
));
4604 * cik_gpu_check_soft_reset - check which blocks are busy
4606 * @rdev: radeon_device pointer
4608 * Check which blocks are busy and return the relevant reset
4609 * mask to be used by cik_gpu_soft_reset().
4610 * Returns a mask of the blocks to be reset.
4612 static u32
cik_gpu_check_soft_reset(struct radeon_device
*rdev
)
4618 tmp
= RREG32(GRBM_STATUS
);
4619 if (tmp
& (PA_BUSY
| SC_BUSY
|
4620 BCI_BUSY
| SX_BUSY
|
4621 TA_BUSY
| VGT_BUSY
|
4623 GDS_BUSY
| SPI_BUSY
|
4624 IA_BUSY
| IA_BUSY_NO_DMA
))
4625 reset_mask
|= RADEON_RESET_GFX
;
4627 if (tmp
& (CP_BUSY
| CP_COHERENCY_BUSY
))
4628 reset_mask
|= RADEON_RESET_CP
;
4631 tmp
= RREG32(GRBM_STATUS2
);
4633 reset_mask
|= RADEON_RESET_RLC
;
4635 /* SDMA0_STATUS_REG */
4636 tmp
= RREG32(SDMA0_STATUS_REG
+ SDMA0_REGISTER_OFFSET
);
4637 if (!(tmp
& SDMA_IDLE
))
4638 reset_mask
|= RADEON_RESET_DMA
;
4640 /* SDMA1_STATUS_REG */
4641 tmp
= RREG32(SDMA0_STATUS_REG
+ SDMA1_REGISTER_OFFSET
);
4642 if (!(tmp
& SDMA_IDLE
))
4643 reset_mask
|= RADEON_RESET_DMA1
;
4646 tmp
= RREG32(SRBM_STATUS2
);
4647 if (tmp
& SDMA_BUSY
)
4648 reset_mask
|= RADEON_RESET_DMA
;
4650 if (tmp
& SDMA1_BUSY
)
4651 reset_mask
|= RADEON_RESET_DMA1
;
4654 tmp
= RREG32(SRBM_STATUS
);
4657 reset_mask
|= RADEON_RESET_IH
;
4660 reset_mask
|= RADEON_RESET_SEM
;
4662 if (tmp
& GRBM_RQ_PENDING
)
4663 reset_mask
|= RADEON_RESET_GRBM
;
4666 reset_mask
|= RADEON_RESET_VMC
;
4668 if (tmp
& (MCB_BUSY
| MCB_NON_DISPLAY_BUSY
|
4669 MCC_BUSY
| MCD_BUSY
))
4670 reset_mask
|= RADEON_RESET_MC
;
4672 if (evergreen_is_display_hung(rdev
))
4673 reset_mask
|= RADEON_RESET_DISPLAY
;
4675 /* Skip MC reset as it's mostly likely not hung, just busy */
4676 if (reset_mask
& RADEON_RESET_MC
) {
4677 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask
);
4678 reset_mask
&= ~RADEON_RESET_MC
;
4685 * cik_gpu_soft_reset - soft reset GPU
4687 * @rdev: radeon_device pointer
4688 * @reset_mask: mask of which blocks to reset
4690 * Soft reset the blocks specified in @reset_mask.
4692 static void cik_gpu_soft_reset(struct radeon_device
*rdev
, u32 reset_mask
)
4694 struct evergreen_mc_save save
;
4695 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
4698 if (reset_mask
== 0)
4701 dev_info(rdev
->dev
, "GPU softreset: 0x%08X\n", reset_mask
);
4703 cik_print_gpu_status_regs(rdev
);
4704 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4705 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
));
4706 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4707 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
));
4712 /* Disable GFX parsing/prefetching */
4713 WREG32(CP_ME_CNTL
, CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
);
4715 /* Disable MEC parsing/prefetching */
4716 WREG32(CP_MEC_CNTL
, MEC_ME1_HALT
| MEC_ME2_HALT
);
4718 if (reset_mask
& RADEON_RESET_DMA
) {
4720 tmp
= RREG32(SDMA0_ME_CNTL
+ SDMA0_REGISTER_OFFSET
);
4722 WREG32(SDMA0_ME_CNTL
+ SDMA0_REGISTER_OFFSET
, tmp
);
4724 if (reset_mask
& RADEON_RESET_DMA1
) {
4726 tmp
= RREG32(SDMA0_ME_CNTL
+ SDMA1_REGISTER_OFFSET
);
4728 WREG32(SDMA0_ME_CNTL
+ SDMA1_REGISTER_OFFSET
, tmp
);
4731 evergreen_mc_stop(rdev
, &save
);
4732 if (evergreen_mc_wait_for_idle(rdev
)) {
4733 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
4736 if (reset_mask
& (RADEON_RESET_GFX
| RADEON_RESET_COMPUTE
| RADEON_RESET_CP
))
4737 grbm_soft_reset
= SOFT_RESET_CP
| SOFT_RESET_GFX
;
4739 if (reset_mask
& RADEON_RESET_CP
) {
4740 grbm_soft_reset
|= SOFT_RESET_CP
;
4742 srbm_soft_reset
|= SOFT_RESET_GRBM
;
4745 if (reset_mask
& RADEON_RESET_DMA
)
4746 srbm_soft_reset
|= SOFT_RESET_SDMA
;
4748 if (reset_mask
& RADEON_RESET_DMA1
)
4749 srbm_soft_reset
|= SOFT_RESET_SDMA1
;
4751 if (reset_mask
& RADEON_RESET_DISPLAY
)
4752 srbm_soft_reset
|= SOFT_RESET_DC
;
4754 if (reset_mask
& RADEON_RESET_RLC
)
4755 grbm_soft_reset
|= SOFT_RESET_RLC
;
4757 if (reset_mask
& RADEON_RESET_SEM
)
4758 srbm_soft_reset
|= SOFT_RESET_SEM
;
4760 if (reset_mask
& RADEON_RESET_IH
)
4761 srbm_soft_reset
|= SOFT_RESET_IH
;
4763 if (reset_mask
& RADEON_RESET_GRBM
)
4764 srbm_soft_reset
|= SOFT_RESET_GRBM
;
4766 if (reset_mask
& RADEON_RESET_VMC
)
4767 srbm_soft_reset
|= SOFT_RESET_VMC
;
4769 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
4770 if (reset_mask
& RADEON_RESET_MC
)
4771 srbm_soft_reset
|= SOFT_RESET_MC
;
4774 if (grbm_soft_reset
) {
4775 tmp
= RREG32(GRBM_SOFT_RESET
);
4776 tmp
|= grbm_soft_reset
;
4777 dev_info(rdev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
4778 WREG32(GRBM_SOFT_RESET
, tmp
);
4779 tmp
= RREG32(GRBM_SOFT_RESET
);
4783 tmp
&= ~grbm_soft_reset
;
4784 WREG32(GRBM_SOFT_RESET
, tmp
);
4785 tmp
= RREG32(GRBM_SOFT_RESET
);
4788 if (srbm_soft_reset
) {
4789 tmp
= RREG32(SRBM_SOFT_RESET
);
4790 tmp
|= srbm_soft_reset
;
4791 dev_info(rdev
->dev
, "SRBM_SOFT_RESET=0x%08X\n", tmp
);
4792 WREG32(SRBM_SOFT_RESET
, tmp
);
4793 tmp
= RREG32(SRBM_SOFT_RESET
);
4797 tmp
&= ~srbm_soft_reset
;
4798 WREG32(SRBM_SOFT_RESET
, tmp
);
4799 tmp
= RREG32(SRBM_SOFT_RESET
);
4802 /* Wait a little for things to settle down */
4805 evergreen_mc_resume(rdev
, &save
);
4808 cik_print_gpu_status_regs(rdev
);
4812 * cik_asic_reset - soft reset GPU
4814 * @rdev: radeon_device pointer
4816 * Look up which blocks are hung and attempt
4818 * Returns 0 for success.
4820 int cik_asic_reset(struct radeon_device
*rdev
)
4824 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4827 r600_set_bios_scratch_engine_hung(rdev
, true);
4829 cik_gpu_soft_reset(rdev
, reset_mask
);
4831 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4834 r600_set_bios_scratch_engine_hung(rdev
, false);
4840 * cik_gfx_is_lockup - check if the 3D engine is locked up
4842 * @rdev: radeon_device pointer
4843 * @ring: radeon_ring structure holding ring information
4845 * Check if the 3D engine is locked up (CIK).
4846 * Returns true if the engine is locked, false if not.
4848 bool cik_gfx_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
4850 u32 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4852 if (!(reset_mask
& (RADEON_RESET_GFX
|
4853 RADEON_RESET_COMPUTE
|
4854 RADEON_RESET_CP
))) {
4855 radeon_ring_lockup_update(ring
);
4858 /* force CP activities */
4859 radeon_ring_force_activity(rdev
, ring
);
4860 return radeon_ring_test_lockup(rdev
, ring
);
4864 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4866 * @rdev: radeon_device pointer
4867 * @ring: radeon_ring structure holding ring information
4869 * Check if the async DMA engine is locked up (CIK).
4870 * Returns true if the engine appears to be locked up, false if not.
4872 bool cik_sdma_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
4874 u32 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4877 if (ring
->idx
== R600_RING_TYPE_DMA_INDEX
)
4878 mask
= RADEON_RESET_DMA
;
4880 mask
= RADEON_RESET_DMA1
;
4882 if (!(reset_mask
& mask
)) {
4883 radeon_ring_lockup_update(ring
);
4886 /* force ring activities */
4887 radeon_ring_force_activity(rdev
, ring
);
4888 return radeon_ring_test_lockup(rdev
, ring
);
4893 * cik_mc_program - program the GPU memory controller
4895 * @rdev: radeon_device pointer
4897 * Set the location of vram, gart, and AGP in the GPU's
4898 * physical address space (CIK).
4900 static void cik_mc_program(struct radeon_device
*rdev
)
4902 struct evergreen_mc_save save
;
4906 /* Initialize HDP */
4907 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
4908 WREG32((0x2c14 + j
), 0x00000000);
4909 WREG32((0x2c18 + j
), 0x00000000);
4910 WREG32((0x2c1c + j
), 0x00000000);
4911 WREG32((0x2c20 + j
), 0x00000000);
4912 WREG32((0x2c24 + j
), 0x00000000);
4914 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL
, 0);
4916 evergreen_mc_stop(rdev
, &save
);
4917 if (radeon_mc_wait_for_idle(rdev
)) {
4918 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
4920 /* Lockout access through VGA aperture*/
4921 WREG32(VGA_HDP_CONTROL
, VGA_MEMORY_DISABLE
);
4922 /* Update configuration */
4923 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR
,
4924 rdev
->mc
.vram_start
>> 12);
4925 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR
,
4926 rdev
->mc
.vram_end
>> 12);
4927 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR
,
4928 rdev
->vram_scratch
.gpu_addr
>> 12);
4929 tmp
= ((rdev
->mc
.vram_end
>> 24) & 0xFFFF) << 16;
4930 tmp
|= ((rdev
->mc
.vram_start
>> 24) & 0xFFFF);
4931 WREG32(MC_VM_FB_LOCATION
, tmp
);
4932 /* XXX double check these! */
4933 WREG32(HDP_NONSURFACE_BASE
, (rdev
->mc
.vram_start
>> 8));
4934 WREG32(HDP_NONSURFACE_INFO
, (2 << 7) | (1 << 30));
4935 WREG32(HDP_NONSURFACE_SIZE
, 0x3FFFFFFF);
4936 WREG32(MC_VM_AGP_BASE
, 0);
4937 WREG32(MC_VM_AGP_TOP
, 0x0FFFFFFF);
4938 WREG32(MC_VM_AGP_BOT
, 0x0FFFFFFF);
4939 if (radeon_mc_wait_for_idle(rdev
)) {
4940 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
4942 evergreen_mc_resume(rdev
, &save
);
4943 /* we need to own VRAM, so turn off the VGA renderer here
4944 * to stop it overwriting our objects */
4945 rv515_vga_render_disable(rdev
);
4949 * cik_mc_init - initialize the memory controller driver params
4951 * @rdev: radeon_device pointer
4953 * Look up the amount of vram, vram width, and decide how to place
4954 * vram and gart within the GPU's physical address space (CIK).
4955 * Returns 0 for success.
4957 static int cik_mc_init(struct radeon_device
*rdev
)
4960 int chansize
, numchan
;
4962 /* Get VRAM informations */
4963 rdev
->mc
.vram_is_ddr
= true;
4964 tmp
= RREG32(MC_ARB_RAMCFG
);
4965 if (tmp
& CHANSIZE_MASK
) {
4970 tmp
= RREG32(MC_SHARED_CHMAP
);
4971 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
5001 rdev
->mc
.vram_width
= numchan
* chansize
;
5002 /* Could aper size report 0 ? */
5003 rdev
->mc
.aper_base
= pci_resource_start(rdev
->pdev
, 0);
5004 rdev
->mc
.aper_size
= pci_resource_len(rdev
->pdev
, 0);
5005 /* size in MB on si */
5006 rdev
->mc
.mc_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
5007 rdev
->mc
.real_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
5008 rdev
->mc
.visible_vram_size
= rdev
->mc
.aper_size
;
5009 si_vram_gtt_location(rdev
, &rdev
->mc
);
5010 radeon_update_bandwidth_info(rdev
);
5017 * VMID 0 is the physical GPU addresses as used by the kernel.
5018 * VMIDs 1-15 are used for userspace clients and are handled
5019 * by the radeon vm/hsa code.
5022 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5024 * @rdev: radeon_device pointer
5026 * Flush the TLB for the VMID 0 page table (CIK).
5028 void cik_pcie_gart_tlb_flush(struct radeon_device
*rdev
)
5030 /* flush hdp cache */
5031 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL
, 0);
5033 /* bits 0-15 are the VM contexts0-15 */
5034 WREG32(VM_INVALIDATE_REQUEST
, 0x1);
5038 * cik_pcie_gart_enable - gart enable
5040 * @rdev: radeon_device pointer
5042 * This sets up the TLBs, programs the page tables for VMID0,
5043 * sets up the hw for VMIDs 1-15 which are allocated on
5044 * demand, and sets up the global locations for the LDS, GDS,
5045 * and GPUVM for FSA64 clients (CIK).
5046 * Returns 0 for success, errors for failure.
5048 static int cik_pcie_gart_enable(struct radeon_device
*rdev
)
5052 if (rdev
->gart
.robj
== NULL
) {
5053 dev_err(rdev
->dev
, "No VRAM object for PCIE GART.\n");
5056 r
= radeon_gart_table_vram_pin(rdev
);
5059 radeon_gart_restore(rdev
);
5060 /* Setup TLB control */
5061 WREG32(MC_VM_MX_L1_TLB_CNTL
,
5064 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
5065 ENABLE_ADVANCED_DRIVER_MODEL
|
5066 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
5067 /* Setup L2 cache */
5068 WREG32(VM_L2_CNTL
, ENABLE_L2_CACHE
|
5069 ENABLE_L2_FRAGMENT_PROCESSING
|
5070 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
5071 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
5072 EFFECTIVE_L2_QUEUE_SIZE(7) |
5073 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5074 WREG32(VM_L2_CNTL2
, INVALIDATE_ALL_L1_TLBS
| INVALIDATE_L2_CACHE
);
5075 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
5076 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5077 /* setup context0 */
5078 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR
, rdev
->mc
.gtt_start
>> 12);
5079 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR
, rdev
->mc
.gtt_end
>> 12);
5080 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
, rdev
->gart
.table_addr
>> 12);
5081 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR
,
5082 (u32
)(rdev
->dummy_page
.addr
>> 12));
5083 WREG32(VM_CONTEXT0_CNTL2
, 0);
5084 WREG32(VM_CONTEXT0_CNTL
, (ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(0) |
5085 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
));
5091 /* empty context1-15 */
5092 /* FIXME start with 4G, once using 2 level pt switch to full
5095 /* set vm size, must be a multiple of 4 */
5096 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR
, 0);
5097 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR
, rdev
->vm_manager
.max_pfn
);
5098 for (i
= 1; i
< 16; i
++) {
5100 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (i
<< 2),
5101 rdev
->gart
.table_addr
>> 12);
5103 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((i
- 8) << 2),
5104 rdev
->gart
.table_addr
>> 12);
5107 /* enable context1-15 */
5108 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR
,
5109 (u32
)(rdev
->dummy_page
.addr
>> 12));
5110 WREG32(VM_CONTEXT1_CNTL2
, 4);
5111 WREG32(VM_CONTEXT1_CNTL
, ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(1) |
5112 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5113 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
5114 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5115 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
5116 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5117 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT
|
5118 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5119 VALID_PROTECTION_FAULT_ENABLE_DEFAULT
|
5120 READ_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5121 READ_PROTECTION_FAULT_ENABLE_DEFAULT
|
5122 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5123 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT
);
5125 /* TC cache setup ??? */
5126 WREG32(TC_CFG_L1_LOAD_POLICY0
, 0);
5127 WREG32(TC_CFG_L1_LOAD_POLICY1
, 0);
5128 WREG32(TC_CFG_L1_STORE_POLICY
, 0);
5130 WREG32(TC_CFG_L2_LOAD_POLICY0
, 0);
5131 WREG32(TC_CFG_L2_LOAD_POLICY1
, 0);
5132 WREG32(TC_CFG_L2_STORE_POLICY0
, 0);
5133 WREG32(TC_CFG_L2_STORE_POLICY1
, 0);
5134 WREG32(TC_CFG_L2_ATOMIC_POLICY
, 0);
5136 WREG32(TC_CFG_L1_VOLATILE
, 0);
5137 WREG32(TC_CFG_L2_VOLATILE
, 0);
5139 if (rdev
->family
== CHIP_KAVERI
) {
5140 u32 tmp
= RREG32(CHUB_CONTROL
);
5142 WREG32(CHUB_CONTROL
, tmp
);
5145 /* XXX SH_MEM regs */
5146 /* where to put LDS, scratch, GPUVM in FSA64 space */
5147 mutex_lock(&rdev
->srbm_mutex
);
5148 for (i
= 0; i
< 16; i
++) {
5149 cik_srbm_select(rdev
, 0, 0, 0, i
);
5150 /* CP and shaders */
5151 WREG32(SH_MEM_CONFIG
, 0);
5152 WREG32(SH_MEM_APE1_BASE
, 1);
5153 WREG32(SH_MEM_APE1_LIMIT
, 0);
5154 WREG32(SH_MEM_BASES
, 0);
5156 WREG32(SDMA0_GFX_VIRTUAL_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
5157 WREG32(SDMA0_GFX_APE1_CNTL
+ SDMA0_REGISTER_OFFSET
, 0);
5158 WREG32(SDMA0_GFX_VIRTUAL_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
5159 WREG32(SDMA0_GFX_APE1_CNTL
+ SDMA1_REGISTER_OFFSET
, 0);
5160 /* XXX SDMA RLC - todo */
5162 cik_srbm_select(rdev
, 0, 0, 0, 0);
5163 mutex_unlock(&rdev
->srbm_mutex
);
5165 cik_pcie_gart_tlb_flush(rdev
);
5166 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5167 (unsigned)(rdev
->mc
.gtt_size
>> 20),
5168 (unsigned long long)rdev
->gart
.table_addr
);
5169 rdev
->gart
.ready
= true;
5174 * cik_pcie_gart_disable - gart disable
5176 * @rdev: radeon_device pointer
5178 * This disables all VM page table (CIK).
5180 static void cik_pcie_gart_disable(struct radeon_device
*rdev
)
5182 /* Disable all tables */
5183 WREG32(VM_CONTEXT0_CNTL
, 0);
5184 WREG32(VM_CONTEXT1_CNTL
, 0);
5185 /* Setup TLB control */
5186 WREG32(MC_VM_MX_L1_TLB_CNTL
, SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
5187 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
5188 /* Setup L2 cache */
5190 ENABLE_L2_FRAGMENT_PROCESSING
|
5191 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
5192 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
5193 EFFECTIVE_L2_QUEUE_SIZE(7) |
5194 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5195 WREG32(VM_L2_CNTL2
, 0);
5196 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
5197 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5198 radeon_gart_table_vram_unpin(rdev
);
5202 * cik_pcie_gart_fini - vm fini callback
5204 * @rdev: radeon_device pointer
5206 * Tears down the driver GART/VM setup (CIK).
5208 static void cik_pcie_gart_fini(struct radeon_device
*rdev
)
5210 cik_pcie_gart_disable(rdev
);
5211 radeon_gart_table_vram_free(rdev
);
5212 radeon_gart_fini(rdev
);
5217 * cik_ib_parse - vm ib_parse callback
5219 * @rdev: radeon_device pointer
5220 * @ib: indirect buffer pointer
5222 * CIK uses hw IB checking so this is a nop (CIK).
5224 int cik_ib_parse(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
5231 * VMID 0 is the physical GPU addresses as used by the kernel.
5232 * VMIDs 1-15 are used for userspace clients and are handled
5233 * by the radeon vm/hsa code.
5236 * cik_vm_init - cik vm init callback
5238 * @rdev: radeon_device pointer
5240 * Inits cik specific vm parameters (number of VMs, base of vram for
5241 * VMIDs 1-15) (CIK).
5242 * Returns 0 for success.
5244 int cik_vm_init(struct radeon_device
*rdev
)
5247 rdev
->vm_manager
.nvm
= 16;
5248 /* base offset of vram pages */
5249 if (rdev
->flags
& RADEON_IS_IGP
) {
5250 u64 tmp
= RREG32(MC_VM_FB_OFFSET
);
5252 rdev
->vm_manager
.vram_base_offset
= tmp
;
5254 rdev
->vm_manager
.vram_base_offset
= 0;
5260 * cik_vm_fini - cik vm fini callback
5262 * @rdev: radeon_device pointer
5264 * Tear down any asic specific VM setup (CIK).
5266 void cik_vm_fini(struct radeon_device
*rdev
)
5271 * cik_vm_decode_fault - print human readable fault info
5273 * @rdev: radeon_device pointer
5274 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5275 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5277 * Print human readable fault information (CIK).
5279 static void cik_vm_decode_fault(struct radeon_device
*rdev
,
5280 u32 status
, u32 addr
, u32 mc_client
)
5282 u32 mc_id
= (status
& MEMORY_CLIENT_ID_MASK
) >> MEMORY_CLIENT_ID_SHIFT
;
5283 u32 vmid
= (status
& FAULT_VMID_MASK
) >> FAULT_VMID_SHIFT
;
5284 u32 protections
= (status
& PROTECTIONS_MASK
) >> PROTECTIONS_SHIFT
;
5285 char *block
= (char *)&mc_client
;
5287 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5288 protections
, vmid
, addr
,
5289 (status
& MEMORY_CLIENT_RW_MASK
) ? "write" : "read",
5294 * cik_vm_flush - cik vm flush using the CP
5296 * @rdev: radeon_device pointer
5298 * Update the page table base and flush the VM TLB
5299 * using the CP (CIK).
5301 void cik_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
5303 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
5308 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5309 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5310 WRITE_DATA_DST_SEL(0)));
5312 radeon_ring_write(ring
,
5313 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
5315 radeon_ring_write(ring
,
5316 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
5318 radeon_ring_write(ring
, 0);
5319 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
5321 /* update SH_MEM_* regs */
5322 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5323 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5324 WRITE_DATA_DST_SEL(0)));
5325 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
5326 radeon_ring_write(ring
, 0);
5327 radeon_ring_write(ring
, VMID(vm
->id
));
5329 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 6));
5330 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5331 WRITE_DATA_DST_SEL(0)));
5332 radeon_ring_write(ring
, SH_MEM_BASES
>> 2);
5333 radeon_ring_write(ring
, 0);
5335 radeon_ring_write(ring
, 0); /* SH_MEM_BASES */
5336 radeon_ring_write(ring
, 0); /* SH_MEM_CONFIG */
5337 radeon_ring_write(ring
, 1); /* SH_MEM_APE1_BASE */
5338 radeon_ring_write(ring
, 0); /* SH_MEM_APE1_LIMIT */
5340 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5341 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5342 WRITE_DATA_DST_SEL(0)));
5343 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
5344 radeon_ring_write(ring
, 0);
5345 radeon_ring_write(ring
, VMID(0));
5348 /* We should be using the WAIT_REG_MEM packet here like in
5349 * cik_fence_ring_emit(), but it causes the CP to hang in this
5352 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5353 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5354 WRITE_DATA_DST_SEL(0)));
5355 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
5356 radeon_ring_write(ring
, 0);
5357 radeon_ring_write(ring
, 0);
5359 /* bits 0-15 are the VM contexts0-15 */
5360 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5361 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5362 WRITE_DATA_DST_SEL(0)));
5363 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
5364 radeon_ring_write(ring
, 0);
5365 radeon_ring_write(ring
, 1 << vm
->id
);
5367 /* compute doesn't have PFP */
5368 if (ridx
== RADEON_RING_TYPE_GFX_INDEX
) {
5369 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5370 radeon_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
5371 radeon_ring_write(ring
, 0x0);
5376 * cik_vm_set_page - update the page tables using sDMA
5378 * @rdev: radeon_device pointer
5379 * @ib: indirect buffer to fill with commands
5380 * @pe: addr of the page entry
5381 * @addr: dst addr to write into pe
5382 * @count: number of page entries to update
5383 * @incr: increase next addr by incr bytes
5384 * @flags: access flags
5386 * Update the page tables using CP or sDMA (CIK).
5388 void cik_vm_set_page(struct radeon_device
*rdev
,
5389 struct radeon_ib
*ib
,
5391 uint64_t addr
, unsigned count
,
5392 uint32_t incr
, uint32_t flags
)
5394 uint32_t r600_flags
= cayman_vm_page_flags(rdev
, flags
);
5398 if (rdev
->asic
->vm
.pt_ring_index
== RADEON_RING_TYPE_GFX_INDEX
) {
5401 ndw
= 2 + count
* 2;
5405 ib
->ptr
[ib
->length_dw
++] = PACKET3(PACKET3_WRITE_DATA
, ndw
);
5406 ib
->ptr
[ib
->length_dw
++] = (WRITE_DATA_ENGINE_SEL(0) |
5407 WRITE_DATA_DST_SEL(1));
5408 ib
->ptr
[ib
->length_dw
++] = pe
;
5409 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
5410 for (; ndw
> 2; ndw
-= 2, --count
, pe
+= 8) {
5411 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
5412 value
= radeon_vm_map_gart(rdev
, addr
);
5413 value
&= 0xFFFFFFFFFFFFF000ULL
;
5414 } else if (flags
& RADEON_VM_PAGE_VALID
) {
5420 value
|= r600_flags
;
5421 ib
->ptr
[ib
->length_dw
++] = value
;
5422 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
5427 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
5433 /* for non-physically contiguous pages (system) */
5434 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0);
5435 ib
->ptr
[ib
->length_dw
++] = pe
;
5436 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
5437 ib
->ptr
[ib
->length_dw
++] = ndw
;
5438 for (; ndw
> 0; ndw
-= 2, --count
, pe
+= 8) {
5439 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
5440 value
= radeon_vm_map_gart(rdev
, addr
);
5441 value
&= 0xFFFFFFFFFFFFF000ULL
;
5442 } else if (flags
& RADEON_VM_PAGE_VALID
) {
5448 value
|= r600_flags
;
5449 ib
->ptr
[ib
->length_dw
++] = value
;
5450 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
5459 if (flags
& RADEON_VM_PAGE_VALID
)
5463 /* for physically contiguous pages (vram) */
5464 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE
, 0, 0);
5465 ib
->ptr
[ib
->length_dw
++] = pe
; /* dst addr */
5466 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
5467 ib
->ptr
[ib
->length_dw
++] = r600_flags
; /* mask */
5468 ib
->ptr
[ib
->length_dw
++] = 0;
5469 ib
->ptr
[ib
->length_dw
++] = value
; /* value */
5470 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
5471 ib
->ptr
[ib
->length_dw
++] = incr
; /* increment size */
5472 ib
->ptr
[ib
->length_dw
++] = 0;
5473 ib
->ptr
[ib
->length_dw
++] = ndw
; /* number of entries */
5479 while (ib
->length_dw
& 0x7)
5480 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0);
5485 * cik_dma_vm_flush - cik vm flush using sDMA
5487 * @rdev: radeon_device pointer
5489 * Update the page table base and flush the VM TLB
5492 void cik_dma_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
5494 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
5495 u32 extra_bits
= (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5496 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5502 if (ridx
== R600_RING_TYPE_DMA_INDEX
)
5503 ref_and_mask
= SDMA0
;
5505 ref_and_mask
= SDMA1
;
5507 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5509 radeon_ring_write(ring
, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
5511 radeon_ring_write(ring
, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
5513 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
5515 /* update SH_MEM_* regs */
5516 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5517 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
5518 radeon_ring_write(ring
, VMID(vm
->id
));
5520 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5521 radeon_ring_write(ring
, SH_MEM_BASES
>> 2);
5522 radeon_ring_write(ring
, 0);
5524 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5525 radeon_ring_write(ring
, SH_MEM_CONFIG
>> 2);
5526 radeon_ring_write(ring
, 0);
5528 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5529 radeon_ring_write(ring
, SH_MEM_APE1_BASE
>> 2);
5530 radeon_ring_write(ring
, 1);
5532 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5533 radeon_ring_write(ring
, SH_MEM_APE1_LIMIT
>> 2);
5534 radeon_ring_write(ring
, 0);
5536 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5537 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
5538 radeon_ring_write(ring
, VMID(0));
5541 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM
, 0, extra_bits
));
5542 radeon_ring_write(ring
, GPU_HDP_FLUSH_DONE
);
5543 radeon_ring_write(ring
, GPU_HDP_FLUSH_REQ
);
5544 radeon_ring_write(ring
, ref_and_mask
); /* REFERENCE */
5545 radeon_ring_write(ring
, ref_and_mask
); /* MASK */
5546 radeon_ring_write(ring
, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5549 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5550 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
5551 radeon_ring_write(ring
, 1 << vm
->id
);
5556 * The RLC is a multi-purpose microengine that handles a
5557 * variety of functions, the most important of which is
5558 * the interrupt controller.
5560 static void cik_enable_gui_idle_interrupt(struct radeon_device
*rdev
,
5563 u32 tmp
= RREG32(CP_INT_CNTL_RING0
);
5566 tmp
|= (CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
5568 tmp
&= ~(CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
5569 WREG32(CP_INT_CNTL_RING0
, tmp
);
5572 static void cik_enable_lbpw(struct radeon_device
*rdev
, bool enable
)
5576 tmp
= RREG32(RLC_LB_CNTL
);
5578 tmp
|= LOAD_BALANCE_ENABLE
;
5580 tmp
&= ~LOAD_BALANCE_ENABLE
;
5581 WREG32(RLC_LB_CNTL
, tmp
);
5584 static void cik_wait_for_rlc_serdes(struct radeon_device
*rdev
)
5589 for (i
= 0; i
< rdev
->config
.cik
.max_shader_engines
; i
++) {
5590 for (j
= 0; j
< rdev
->config
.cik
.max_sh_per_se
; j
++) {
5591 cik_select_se_sh(rdev
, i
, j
);
5592 for (k
= 0; k
< rdev
->usec_timeout
; k
++) {
5593 if (RREG32(RLC_SERDES_CU_MASTER_BUSY
) == 0)
5599 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5601 mask
= SE_MASTER_BUSY_MASK
| GC_MASTER_BUSY
| TC0_MASTER_BUSY
| TC1_MASTER_BUSY
;
5602 for (k
= 0; k
< rdev
->usec_timeout
; k
++) {
5603 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
5609 static void cik_update_rlc(struct radeon_device
*rdev
, u32 rlc
)
5613 tmp
= RREG32(RLC_CNTL
);
5615 WREG32(RLC_CNTL
, rlc
);
5618 static u32
cik_halt_rlc(struct radeon_device
*rdev
)
5622 orig
= data
= RREG32(RLC_CNTL
);
5624 if (data
& RLC_ENABLE
) {
5627 data
&= ~RLC_ENABLE
;
5628 WREG32(RLC_CNTL
, data
);
5630 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
5631 if ((RREG32(RLC_GPM_STAT
) & RLC_GPM_BUSY
) == 0)
5636 cik_wait_for_rlc_serdes(rdev
);
5642 void cik_enter_rlc_safe_mode(struct radeon_device
*rdev
)
5646 tmp
= REQ
| MESSAGE(MSG_ENTER_RLC_SAFE_MODE
);
5647 WREG32(RLC_GPR_REG2
, tmp
);
5649 mask
= GFX_POWER_STATUS
| GFX_CLOCK_STATUS
;
5650 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
5651 if ((RREG32(RLC_GPM_STAT
) & mask
) == mask
)
5656 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
5657 if ((RREG32(RLC_GPR_REG2
) & REQ
) == 0)
5663 void cik_exit_rlc_safe_mode(struct radeon_device
*rdev
)
5667 tmp
= REQ
| MESSAGE(MSG_EXIT_RLC_SAFE_MODE
);
5668 WREG32(RLC_GPR_REG2
, tmp
);
5672 * cik_rlc_stop - stop the RLC ME
5674 * @rdev: radeon_device pointer
5676 * Halt the RLC ME (MicroEngine) (CIK).
5678 static void cik_rlc_stop(struct radeon_device
*rdev
)
5680 WREG32(RLC_CNTL
, 0);
5682 cik_enable_gui_idle_interrupt(rdev
, false);
5684 cik_wait_for_rlc_serdes(rdev
);
5688 * cik_rlc_start - start the RLC ME
5690 * @rdev: radeon_device pointer
5692 * Unhalt the RLC ME (MicroEngine) (CIK).
5694 static void cik_rlc_start(struct radeon_device
*rdev
)
5696 WREG32(RLC_CNTL
, RLC_ENABLE
);
5698 cik_enable_gui_idle_interrupt(rdev
, true);
5704 * cik_rlc_resume - setup the RLC hw
5706 * @rdev: radeon_device pointer
5708 * Initialize the RLC registers, load the ucode,
5709 * and start the RLC (CIK).
5710 * Returns 0 for success, -EINVAL if the ucode is not available.
5712 static int cik_rlc_resume(struct radeon_device
*rdev
)
5715 const __be32
*fw_data
;
5720 switch (rdev
->family
) {
5723 size
= BONAIRE_RLC_UCODE_SIZE
;
5726 size
= KV_RLC_UCODE_SIZE
;
5729 size
= KB_RLC_UCODE_SIZE
;
5736 tmp
= RREG32(RLC_CGCG_CGLS_CTRL
) & 0xfffffffc;
5737 WREG32(RLC_CGCG_CGLS_CTRL
, tmp
);
5745 WREG32(RLC_LB_CNTR_INIT
, 0);
5746 WREG32(RLC_LB_CNTR_MAX
, 0x00008000);
5748 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5749 WREG32(RLC_LB_INIT_CU_MASK
, 0xffffffff);
5750 WREG32(RLC_LB_PARAMS
, 0x00600408);
5751 WREG32(RLC_LB_CNTL
, 0x80000004);
5753 WREG32(RLC_MC_CNTL
, 0);
5754 WREG32(RLC_UCODE_CNTL
, 0);
5756 fw_data
= (const __be32
*)rdev
->rlc_fw
->data
;
5757 WREG32(RLC_GPM_UCODE_ADDR
, 0);
5758 for (i
= 0; i
< size
; i
++)
5759 WREG32(RLC_GPM_UCODE_DATA
, be32_to_cpup(fw_data
++));
5760 WREG32(RLC_GPM_UCODE_ADDR
, 0);
5762 /* XXX - find out what chips support lbpw */
5763 cik_enable_lbpw(rdev
, false);
5765 if (rdev
->family
== CHIP_BONAIRE
)
5766 WREG32(RLC_DRIVER_DMA_STATUS
, 0);
5768 cik_rlc_start(rdev
);
5773 static void cik_enable_cgcg(struct radeon_device
*rdev
, bool enable
)
5775 u32 data
, orig
, tmp
, tmp2
;
5777 orig
= data
= RREG32(RLC_CGCG_CGLS_CTRL
);
5779 cik_enable_gui_idle_interrupt(rdev
, enable
);
5782 tmp
= cik_halt_rlc(rdev
);
5784 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5785 WREG32(RLC_SERDES_WR_CU_MASTER_MASK
, 0xffffffff);
5786 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK
, 0xffffffff);
5787 tmp2
= BPM_ADDR_MASK
| CGCG_OVERRIDE_0
| CGLS_ENABLE
;
5788 WREG32(RLC_SERDES_WR_CTRL
, tmp2
);
5790 cik_update_rlc(rdev
, tmp
);
5792 data
|= CGCG_EN
| CGLS_EN
;
5794 RREG32(CB_CGTT_SCLK_CTRL
);
5795 RREG32(CB_CGTT_SCLK_CTRL
);
5796 RREG32(CB_CGTT_SCLK_CTRL
);
5797 RREG32(CB_CGTT_SCLK_CTRL
);
5799 data
&= ~(CGCG_EN
| CGLS_EN
);
5803 WREG32(RLC_CGCG_CGLS_CTRL
, data
);
5807 static void cik_enable_mgcg(struct radeon_device
*rdev
, bool enable
)
5809 u32 data
, orig
, tmp
= 0;
5812 orig
= data
= RREG32(CP_MEM_SLP_CNTL
);
5813 data
|= CP_MEM_LS_EN
;
5815 WREG32(CP_MEM_SLP_CNTL
, data
);
5817 orig
= data
= RREG32(RLC_CGTT_MGCG_OVERRIDE
);
5820 WREG32(RLC_CGTT_MGCG_OVERRIDE
, data
);
5822 tmp
= cik_halt_rlc(rdev
);
5824 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5825 WREG32(RLC_SERDES_WR_CU_MASTER_MASK
, 0xffffffff);
5826 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK
, 0xffffffff);
5827 data
= BPM_ADDR_MASK
| MGCG_OVERRIDE_0
;
5828 WREG32(RLC_SERDES_WR_CTRL
, data
);
5830 cik_update_rlc(rdev
, tmp
);
5832 orig
= data
= RREG32(CGTS_SM_CTRL_REG
);
5833 data
&= ~SM_MODE_MASK
;
5834 data
|= SM_MODE(0x2);
5835 data
|= SM_MODE_ENABLE
;
5836 data
&= ~CGTS_OVERRIDE
;
5837 data
&= ~CGTS_LS_OVERRIDE
;
5838 data
&= ~ON_MONITOR_ADD_MASK
;
5839 data
|= ON_MONITOR_ADD_EN
;
5840 data
|= ON_MONITOR_ADD(0x96);
5842 WREG32(CGTS_SM_CTRL_REG
, data
);
5844 orig
= data
= RREG32(RLC_CGTT_MGCG_OVERRIDE
);
5847 WREG32(RLC_CGTT_MGCG_OVERRIDE
, data
);
5849 data
= RREG32(RLC_MEM_SLP_CNTL
);
5850 if (data
& RLC_MEM_LS_EN
) {
5851 data
&= ~RLC_MEM_LS_EN
;
5852 WREG32(RLC_MEM_SLP_CNTL
, data
);
5855 data
= RREG32(CP_MEM_SLP_CNTL
);
5856 if (data
& CP_MEM_LS_EN
) {
5857 data
&= ~CP_MEM_LS_EN
;
5858 WREG32(CP_MEM_SLP_CNTL
, data
);
5861 orig
= data
= RREG32(CGTS_SM_CTRL_REG
);
5862 data
|= CGTS_OVERRIDE
| CGTS_LS_OVERRIDE
;
5864 WREG32(CGTS_SM_CTRL_REG
, data
);
5866 tmp
= cik_halt_rlc(rdev
);
5868 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5869 WREG32(RLC_SERDES_WR_CU_MASTER_MASK
, 0xffffffff);
5870 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK
, 0xffffffff);
5871 data
= BPM_ADDR_MASK
| MGCG_OVERRIDE_1
;
5872 WREG32(RLC_SERDES_WR_CTRL
, data
);
5874 cik_update_rlc(rdev
, tmp
);
5878 static const u32 mc_cg_registers
[] =
5891 static void cik_enable_mc_ls(struct radeon_device
*rdev
,
5897 for (i
= 0; i
< ARRAY_SIZE(mc_cg_registers
); i
++) {
5898 orig
= data
= RREG32(mc_cg_registers
[i
]);
5900 data
|= MC_LS_ENABLE
;
5902 data
&= ~MC_LS_ENABLE
;
5904 WREG32(mc_cg_registers
[i
], data
);
5908 static void cik_enable_mc_mgcg(struct radeon_device
*rdev
,
5914 for (i
= 0; i
< ARRAY_SIZE(mc_cg_registers
); i
++) {
5915 orig
= data
= RREG32(mc_cg_registers
[i
]);
5917 data
|= MC_CG_ENABLE
;
5919 data
&= ~MC_CG_ENABLE
;
5921 WREG32(mc_cg_registers
[i
], data
);
5925 static void cik_enable_sdma_mgcg(struct radeon_device
*rdev
,
5931 WREG32(SDMA0_CLK_CTRL
+ SDMA0_REGISTER_OFFSET
, 0x00000100);
5932 WREG32(SDMA0_CLK_CTRL
+ SDMA1_REGISTER_OFFSET
, 0x00000100);
5934 orig
= data
= RREG32(SDMA0_CLK_CTRL
+ SDMA0_REGISTER_OFFSET
);
5937 WREG32(SDMA0_CLK_CTRL
+ SDMA0_REGISTER_OFFSET
, data
);
5939 orig
= data
= RREG32(SDMA0_CLK_CTRL
+ SDMA1_REGISTER_OFFSET
);
5942 WREG32(SDMA0_CLK_CTRL
+ SDMA1_REGISTER_OFFSET
, data
);
5946 static void cik_enable_sdma_mgls(struct radeon_device
*rdev
,
5952 orig
= data
= RREG32(SDMA0_POWER_CNTL
+ SDMA0_REGISTER_OFFSET
);
5955 WREG32(SDMA0_POWER_CNTL
+ SDMA0_REGISTER_OFFSET
, data
);
5957 orig
= data
= RREG32(SDMA0_POWER_CNTL
+ SDMA1_REGISTER_OFFSET
);
5960 WREG32(SDMA0_POWER_CNTL
+ SDMA1_REGISTER_OFFSET
, data
);
5962 orig
= data
= RREG32(SDMA0_POWER_CNTL
+ SDMA0_REGISTER_OFFSET
);
5965 WREG32(SDMA0_POWER_CNTL
+ SDMA0_REGISTER_OFFSET
, data
);
5967 orig
= data
= RREG32(SDMA0_POWER_CNTL
+ SDMA1_REGISTER_OFFSET
);
5970 WREG32(SDMA0_POWER_CNTL
+ SDMA1_REGISTER_OFFSET
, data
);
5974 static void cik_enable_uvd_mgcg(struct radeon_device
*rdev
,
5980 data
= RREG32_UVD_CTX(UVD_CGC_MEM_CTRL
);
5982 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL
, data
);
5984 orig
= data
= RREG32(UVD_CGC_CTRL
);
5987 WREG32(UVD_CGC_CTRL
, data
);
5989 data
= RREG32_UVD_CTX(UVD_CGC_MEM_CTRL
);
5991 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL
, data
);
5993 orig
= data
= RREG32(UVD_CGC_CTRL
);
5996 WREG32(UVD_CGC_CTRL
, data
);
6000 static void cik_enable_hdp_mgcg(struct radeon_device
*rdev
,
6005 orig
= data
= RREG32(HDP_HOST_PATH_CNTL
);
6008 data
&= ~CLOCK_GATING_DIS
;
6010 data
|= CLOCK_GATING_DIS
;
6013 WREG32(HDP_HOST_PATH_CNTL
, data
);
6016 static void cik_enable_hdp_ls(struct radeon_device
*rdev
,
6021 orig
= data
= RREG32(HDP_MEM_POWER_LS
);
6024 data
|= HDP_LS_ENABLE
;
6026 data
&= ~HDP_LS_ENABLE
;
6029 WREG32(HDP_MEM_POWER_LS
, data
);
6032 void cik_update_cg(struct radeon_device
*rdev
,
6033 u32 block
, bool enable
)
6035 if (block
& RADEON_CG_BLOCK_GFX
) {
6036 /* order matters! */
6038 cik_enable_mgcg(rdev
, true);
6039 cik_enable_cgcg(rdev
, true);
6041 cik_enable_cgcg(rdev
, false);
6042 cik_enable_mgcg(rdev
, false);
6046 if (block
& RADEON_CG_BLOCK_MC
) {
6047 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
6048 cik_enable_mc_mgcg(rdev
, enable
);
6049 cik_enable_mc_ls(rdev
, enable
);
6053 if (block
& RADEON_CG_BLOCK_SDMA
) {
6054 cik_enable_sdma_mgcg(rdev
, enable
);
6055 cik_enable_sdma_mgls(rdev
, enable
);
6058 if (block
& RADEON_CG_BLOCK_UVD
) {
6060 cik_enable_uvd_mgcg(rdev
, enable
);
6063 if (block
& RADEON_CG_BLOCK_HDP
) {
6064 cik_enable_hdp_mgcg(rdev
, enable
);
6065 cik_enable_hdp_ls(rdev
, enable
);
6069 static void cik_init_cg(struct radeon_device
*rdev
)
6072 cik_update_cg(rdev
, RADEON_CG_BLOCK_GFX
, false); /* XXX true */
6075 si_init_uvd_internal_cg(rdev
);
6077 cik_update_cg(rdev
, (RADEON_CG_BLOCK_MC
|
6078 RADEON_CG_BLOCK_SDMA
|
6079 RADEON_CG_BLOCK_UVD
|
6080 RADEON_CG_BLOCK_HDP
), true);
6083 static void cik_enable_sck_slowdown_on_pu(struct radeon_device
*rdev
,
6088 orig
= data
= RREG32(RLC_PG_CNTL
);
6090 data
|= SMU_CLK_SLOWDOWN_ON_PU_ENABLE
;
6092 data
&= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE
;
6094 WREG32(RLC_PG_CNTL
, data
);
6097 static void cik_enable_sck_slowdown_on_pd(struct radeon_device
*rdev
,
6102 orig
= data
= RREG32(RLC_PG_CNTL
);
6104 data
|= SMU_CLK_SLOWDOWN_ON_PD_ENABLE
;
6106 data
&= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE
;
6108 WREG32(RLC_PG_CNTL
, data
);
6111 static void cik_enable_cp_pg(struct radeon_device
*rdev
, bool enable
)
6115 orig
= data
= RREG32(RLC_PG_CNTL
);
6117 data
&= ~DISABLE_CP_PG
;
6119 data
|= DISABLE_CP_PG
;
6121 WREG32(RLC_PG_CNTL
, data
);
6124 static void cik_enable_gds_pg(struct radeon_device
*rdev
, bool enable
)
6128 orig
= data
= RREG32(RLC_PG_CNTL
);
6130 data
&= ~DISABLE_GDS_PG
;
6132 data
|= DISABLE_GDS_PG
;
6134 WREG32(RLC_PG_CNTL
, data
);
6137 #define CP_ME_TABLE_SIZE 96
6138 #define CP_ME_TABLE_OFFSET 2048
6139 #define CP_MEC_TABLE_OFFSET 4096
6141 void cik_init_cp_pg_table(struct radeon_device
*rdev
)
6143 const __be32
*fw_data
;
6144 volatile u32
*dst_ptr
;
6145 int me
, i
, max_me
= 4;
6149 if (rdev
->family
== CHIP_KAVERI
)
6152 if (rdev
->rlc
.cp_table_ptr
== NULL
)
6155 /* write the cp table buffer */
6156 dst_ptr
= rdev
->rlc
.cp_table_ptr
;
6157 for (me
= 0; me
< max_me
; me
++) {
6159 fw_data
= (const __be32
*)rdev
->ce_fw
->data
;
6160 table_offset
= CP_ME_TABLE_OFFSET
;
6161 } else if (me
== 1) {
6162 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
6163 table_offset
= CP_ME_TABLE_OFFSET
;
6164 } else if (me
== 2) {
6165 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
6166 table_offset
= CP_ME_TABLE_OFFSET
;
6168 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
6169 table_offset
= CP_MEC_TABLE_OFFSET
;
6172 for (i
= 0; i
< CP_ME_TABLE_SIZE
; i
++) {
6173 dst_ptr
[bo_offset
+ i
] = be32_to_cpu(fw_data
[table_offset
+ i
]);
6175 bo_offset
+= CP_ME_TABLE_SIZE
;
6179 static void cik_enable_gfx_cgpg(struct radeon_device
*rdev
,
6185 orig
= data
= RREG32(RLC_PG_CNTL
);
6186 data
|= GFX_PG_ENABLE
;
6188 WREG32(RLC_PG_CNTL
, data
);
6190 orig
= data
= RREG32(RLC_AUTO_PG_CTRL
);
6193 WREG32(RLC_AUTO_PG_CTRL
, data
);
6195 orig
= data
= RREG32(RLC_PG_CNTL
);
6196 data
&= ~GFX_PG_ENABLE
;
6198 WREG32(RLC_PG_CNTL
, data
);
6200 orig
= data
= RREG32(RLC_AUTO_PG_CTRL
);
6201 data
&= ~AUTO_PG_EN
;
6203 WREG32(RLC_AUTO_PG_CTRL
, data
);
6205 data
= RREG32(DB_RENDER_CONTROL
);
6209 static u32
cik_get_cu_active_bitmap(struct radeon_device
*rdev
, u32 se
, u32 sh
)
6211 u32 mask
= 0, tmp
, tmp1
;
6214 cik_select_se_sh(rdev
, se
, sh
);
6215 tmp
= RREG32(CC_GC_SHADER_ARRAY_CONFIG
);
6216 tmp1
= RREG32(GC_USER_SHADER_ARRAY_CONFIG
);
6217 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
6224 for (i
= 0; i
< rdev
->config
.cik
.max_cu_per_sh
; i
++) {
6229 return (~tmp
) & mask
;
6232 static void cik_init_ao_cu_mask(struct radeon_device
*rdev
)
6234 u32 i
, j
, k
, active_cu_number
= 0;
6235 u32 mask
, counter
, cu_bitmap
;
6238 for (i
= 0; i
< rdev
->config
.cik
.max_shader_engines
; i
++) {
6239 for (j
= 0; j
< rdev
->config
.cik
.max_sh_per_se
; j
++) {
6243 for (k
= 0; k
< rdev
->config
.cik
.max_cu_per_sh
; k
++) {
6244 if (cik_get_cu_active_bitmap(rdev
, i
, j
) & mask
) {
6252 active_cu_number
+= counter
;
6253 tmp
|= (cu_bitmap
<< (i
* 16 + j
* 8));
6257 WREG32(RLC_PG_AO_CU_MASK
, tmp
);
6259 tmp
= RREG32(RLC_MAX_PG_CU
);
6260 tmp
&= ~MAX_PU_CU_MASK
;
6261 tmp
|= MAX_PU_CU(active_cu_number
);
6262 WREG32(RLC_MAX_PG_CU
, tmp
);
6265 static void cik_enable_gfx_static_mgpg(struct radeon_device
*rdev
,
6270 orig
= data
= RREG32(RLC_PG_CNTL
);
6272 data
|= STATIC_PER_CU_PG_ENABLE
;
6274 data
&= ~STATIC_PER_CU_PG_ENABLE
;
6276 WREG32(RLC_PG_CNTL
, data
);
6279 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device
*rdev
,
6284 orig
= data
= RREG32(RLC_PG_CNTL
);
6286 data
|= DYN_PER_CU_PG_ENABLE
;
6288 data
&= ~DYN_PER_CU_PG_ENABLE
;
6290 WREG32(RLC_PG_CNTL
, data
);
6293 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6294 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6296 static void cik_init_gfx_cgpg(struct radeon_device
*rdev
)
6301 if (rdev
->rlc
.cs_data
) {
6302 WREG32(RLC_GPM_SCRATCH_ADDR
, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET
);
6303 WREG32(RLC_GPM_SCRATCH_DATA
, upper_32_bits(rdev
->rlc
.clear_state_gpu_addr
));
6304 WREG32(RLC_GPM_SCRATCH_DATA
, rdev
->rlc
.clear_state_gpu_addr
);
6305 WREG32(RLC_GPM_SCRATCH_DATA
, rdev
->rlc
.clear_state_size
);
6307 WREG32(RLC_GPM_SCRATCH_ADDR
, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET
);
6308 for (i
= 0; i
< 3; i
++)
6309 WREG32(RLC_GPM_SCRATCH_DATA
, 0);
6311 if (rdev
->rlc
.reg_list
) {
6312 WREG32(RLC_GPM_SCRATCH_ADDR
, RLC_SAVE_AND_RESTORE_STARTING_OFFSET
);
6313 for (i
= 0; i
< rdev
->rlc
.reg_list_size
; i
++)
6314 WREG32(RLC_GPM_SCRATCH_DATA
, rdev
->rlc
.reg_list
[i
]);
6317 orig
= data
= RREG32(RLC_PG_CNTL
);
6320 WREG32(RLC_PG_CNTL
, data
);
6322 WREG32(RLC_SAVE_AND_RESTORE_BASE
, rdev
->rlc
.save_restore_gpu_addr
>> 8);
6323 WREG32(RLC_CP_TABLE_RESTORE
, rdev
->rlc
.cp_table_gpu_addr
>> 8);
6325 data
= RREG32(CP_RB_WPTR_POLL_CNTL
);
6326 data
&= ~IDLE_POLL_COUNT_MASK
;
6327 data
|= IDLE_POLL_COUNT(0x60);
6328 WREG32(CP_RB_WPTR_POLL_CNTL
, data
);
6331 WREG32(RLC_PG_DELAY
, data
);
6333 data
= RREG32(RLC_PG_DELAY_2
);
6336 WREG32(RLC_PG_DELAY_2
, data
);
6338 data
= RREG32(RLC_AUTO_PG_CTRL
);
6339 data
&= ~GRBM_REG_SGIT_MASK
;
6340 data
|= GRBM_REG_SGIT(0x700);
6341 WREG32(RLC_AUTO_PG_CTRL
, data
);
6345 static void cik_update_gfx_pg(struct radeon_device
*rdev
, bool enable
)
6347 bool has_pg
= false;
6348 bool has_dyn_mgpg
= false;
6349 bool has_static_mgpg
= false;
6351 /* only APUs have PG */
6352 if (rdev
->flags
& RADEON_IS_IGP
) {
6354 has_static_mgpg
= true;
6355 if (rdev
->family
== CHIP_KAVERI
)
6356 has_dyn_mgpg
= true;
6360 cik_enable_gfx_cgpg(rdev
, enable
);
6362 cik_enable_gfx_static_mgpg(rdev
, has_static_mgpg
);
6363 cik_enable_gfx_dynamic_mgpg(rdev
, has_dyn_mgpg
);
6365 cik_enable_gfx_static_mgpg(rdev
, false);
6366 cik_enable_gfx_dynamic_mgpg(rdev
, false);
6372 void cik_init_pg(struct radeon_device
*rdev
)
6374 bool has_pg
= false;
6376 /* only APUs have PG */
6377 if (rdev
->flags
& RADEON_IS_IGP
) {
6378 /* XXX disable this for now */
6379 /* has_pg = true; */
6383 cik_enable_sck_slowdown_on_pu(rdev
, true);
6384 cik_enable_sck_slowdown_on_pd(rdev
, true);
6385 cik_init_gfx_cgpg(rdev
);
6386 cik_enable_cp_pg(rdev
, true);
6387 cik_enable_gds_pg(rdev
, true);
6388 cik_init_ao_cu_mask(rdev
);
6389 cik_update_gfx_pg(rdev
, true);
6395 * Starting with r6xx, interrupts are handled via a ring buffer.
6396 * Ring buffers are areas of GPU accessible memory that the GPU
6397 * writes interrupt vectors into and the host reads vectors out of.
6398 * There is a rptr (read pointer) that determines where the
6399 * host is currently reading, and a wptr (write pointer)
6400 * which determines where the GPU has written. When the
6401 * pointers are equal, the ring is idle. When the GPU
6402 * writes vectors to the ring buffer, it increments the
6403 * wptr. When there is an interrupt, the host then starts
6404 * fetching commands and processing them until the pointers are
6405 * equal again at which point it updates the rptr.
6409 * cik_enable_interrupts - Enable the interrupt ring buffer
6411 * @rdev: radeon_device pointer
6413 * Enable the interrupt ring buffer (CIK).
6415 static void cik_enable_interrupts(struct radeon_device
*rdev
)
6417 u32 ih_cntl
= RREG32(IH_CNTL
);
6418 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
6420 ih_cntl
|= ENABLE_INTR
;
6421 ih_rb_cntl
|= IH_RB_ENABLE
;
6422 WREG32(IH_CNTL
, ih_cntl
);
6423 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
6424 rdev
->ih
.enabled
= true;
6428 * cik_disable_interrupts - Disable the interrupt ring buffer
6430 * @rdev: radeon_device pointer
6432 * Disable the interrupt ring buffer (CIK).
6434 static void cik_disable_interrupts(struct radeon_device
*rdev
)
6436 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
6437 u32 ih_cntl
= RREG32(IH_CNTL
);
6439 ih_rb_cntl
&= ~IH_RB_ENABLE
;
6440 ih_cntl
&= ~ENABLE_INTR
;
6441 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
6442 WREG32(IH_CNTL
, ih_cntl
);
6443 /* set rptr, wptr to 0 */
6444 WREG32(IH_RB_RPTR
, 0);
6445 WREG32(IH_RB_WPTR
, 0);
6446 rdev
->ih
.enabled
= false;
6451 * cik_disable_interrupt_state - Disable all interrupt sources
6453 * @rdev: radeon_device pointer
6455 * Clear all interrupt enable bits used by the driver (CIK).
6457 static void cik_disable_interrupt_state(struct radeon_device
*rdev
)
6462 WREG32(CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
6464 tmp
= RREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
6465 WREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
, tmp
);
6466 tmp
= RREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
6467 WREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
, tmp
);
6468 /* compute queues */
6469 WREG32(CP_ME1_PIPE0_INT_CNTL
, 0);
6470 WREG32(CP_ME1_PIPE1_INT_CNTL
, 0);
6471 WREG32(CP_ME1_PIPE2_INT_CNTL
, 0);
6472 WREG32(CP_ME1_PIPE3_INT_CNTL
, 0);
6473 WREG32(CP_ME2_PIPE0_INT_CNTL
, 0);
6474 WREG32(CP_ME2_PIPE1_INT_CNTL
, 0);
6475 WREG32(CP_ME2_PIPE2_INT_CNTL
, 0);
6476 WREG32(CP_ME2_PIPE3_INT_CNTL
, 0);
6478 WREG32(GRBM_INT_CNTL
, 0);
6479 /* vline/vblank, etc. */
6480 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, 0);
6481 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, 0);
6482 if (rdev
->num_crtc
>= 4) {
6483 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, 0);
6484 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, 0);
6486 if (rdev
->num_crtc
>= 6) {
6487 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, 0);
6488 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, 0);
6492 WREG32(DAC_AUTODETECT_INT_CONTROL
, 0);
6494 /* digital hotplug */
6495 tmp
= RREG32(DC_HPD1_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6496 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
6497 tmp
= RREG32(DC_HPD2_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6498 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
6499 tmp
= RREG32(DC_HPD3_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6500 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
6501 tmp
= RREG32(DC_HPD4_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6502 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
6503 tmp
= RREG32(DC_HPD5_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6504 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
6505 tmp
= RREG32(DC_HPD6_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6506 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
6511 * cik_irq_init - init and enable the interrupt ring
6513 * @rdev: radeon_device pointer
6515 * Allocate a ring buffer for the interrupt controller,
6516 * enable the RLC, disable interrupts, enable the IH
6517 * ring buffer and enable it (CIK).
6518 * Called at device load and reume.
6519 * Returns 0 for success, errors for failure.
6521 static int cik_irq_init(struct radeon_device
*rdev
)
6525 u32 interrupt_cntl
, ih_cntl
, ih_rb_cntl
;
6528 ret
= r600_ih_ring_alloc(rdev
);
6533 cik_disable_interrupts(rdev
);
6536 ret
= cik_rlc_resume(rdev
);
6538 r600_ih_ring_fini(rdev
);
6542 /* setup interrupt control */
6543 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6544 WREG32(INTERRUPT_CNTL2
, rdev
->ih
.gpu_addr
>> 8);
6545 interrupt_cntl
= RREG32(INTERRUPT_CNTL
);
6546 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6547 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6549 interrupt_cntl
&= ~IH_DUMMY_RD_OVERRIDE
;
6550 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6551 interrupt_cntl
&= ~IH_REQ_NONSNOOP_EN
;
6552 WREG32(INTERRUPT_CNTL
, interrupt_cntl
);
6554 WREG32(IH_RB_BASE
, rdev
->ih
.gpu_addr
>> 8);
6555 rb_bufsz
= drm_order(rdev
->ih
.ring_size
/ 4);
6557 ih_rb_cntl
= (IH_WPTR_OVERFLOW_ENABLE
|
6558 IH_WPTR_OVERFLOW_CLEAR
|
6561 if (rdev
->wb
.enabled
)
6562 ih_rb_cntl
|= IH_WPTR_WRITEBACK_ENABLE
;
6564 /* set the writeback address whether it's enabled or not */
6565 WREG32(IH_RB_WPTR_ADDR_LO
, (rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFFFFFFFC);
6566 WREG32(IH_RB_WPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFF);
6568 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
6570 /* set rptr, wptr to 0 */
6571 WREG32(IH_RB_RPTR
, 0);
6572 WREG32(IH_RB_WPTR
, 0);
6574 /* Default settings for IH_CNTL (disabled at first) */
6575 ih_cntl
= MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6576 /* RPTR_REARM only works if msi's are enabled */
6577 if (rdev
->msi_enabled
)
6578 ih_cntl
|= RPTR_REARM
;
6579 WREG32(IH_CNTL
, ih_cntl
);
6581 /* force the active interrupt state to all disabled */
6582 cik_disable_interrupt_state(rdev
);
6584 pci_set_master(rdev
->pdev
);
6587 cik_enable_interrupts(rdev
);
6593 * cik_irq_set - enable/disable interrupt sources
6595 * @rdev: radeon_device pointer
6597 * Enable interrupt sources on the GPU (vblanks, hpd,
6599 * Returns 0 for success, errors for failure.
6601 int cik_irq_set(struct radeon_device
*rdev
)
6603 u32 cp_int_cntl
= CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
|
6604 PRIV_INSTR_INT_ENABLE
| PRIV_REG_INT_ENABLE
;
6605 u32 cp_m1p0
, cp_m1p1
, cp_m1p2
, cp_m1p3
;
6606 u32 cp_m2p0
, cp_m2p1
, cp_m2p2
, cp_m2p3
;
6607 u32 crtc1
= 0, crtc2
= 0, crtc3
= 0, crtc4
= 0, crtc5
= 0, crtc6
= 0;
6608 u32 hpd1
, hpd2
, hpd3
, hpd4
, hpd5
, hpd6
;
6609 u32 grbm_int_cntl
= 0;
6610 u32 dma_cntl
, dma_cntl1
;
6613 if (!rdev
->irq
.installed
) {
6614 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6617 /* don't enable anything if the ih is disabled */
6618 if (!rdev
->ih
.enabled
) {
6619 cik_disable_interrupts(rdev
);
6620 /* force the active interrupt state to all disabled */
6621 cik_disable_interrupt_state(rdev
);
6625 hpd1
= RREG32(DC_HPD1_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6626 hpd2
= RREG32(DC_HPD2_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6627 hpd3
= RREG32(DC_HPD3_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6628 hpd4
= RREG32(DC_HPD4_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6629 hpd5
= RREG32(DC_HPD5_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6630 hpd6
= RREG32(DC_HPD6_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6632 dma_cntl
= RREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
6633 dma_cntl1
= RREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
6635 cp_m1p0
= RREG32(CP_ME1_PIPE0_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6636 cp_m1p1
= RREG32(CP_ME1_PIPE1_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6637 cp_m1p2
= RREG32(CP_ME1_PIPE2_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6638 cp_m1p3
= RREG32(CP_ME1_PIPE3_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6639 cp_m2p0
= RREG32(CP_ME2_PIPE0_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6640 cp_m2p1
= RREG32(CP_ME2_PIPE1_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6641 cp_m2p2
= RREG32(CP_ME2_PIPE2_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6642 cp_m2p3
= RREG32(CP_ME2_PIPE3_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6644 if (rdev
->flags
& RADEON_IS_IGP
)
6645 thermal_int
= RREG32_SMC(CG_THERMAL_INT_CTRL
) &
6646 ~(THERM_INTH_MASK
| THERM_INTL_MASK
);
6648 thermal_int
= RREG32_SMC(CG_THERMAL_INT
) &
6649 ~(THERM_INT_MASK_HIGH
| THERM_INT_MASK_LOW
);
6651 /* enable CP interrupts on all rings */
6652 if (atomic_read(&rdev
->irq
.ring_int
[RADEON_RING_TYPE_GFX_INDEX
])) {
6653 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6654 cp_int_cntl
|= TIME_STAMP_INT_ENABLE
;
6656 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_CP1_INDEX
])) {
6657 struct radeon_ring
*ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
6658 DRM_DEBUG("si_irq_set: sw int cp1\n");
6659 if (ring
->me
== 1) {
6660 switch (ring
->pipe
) {
6662 cp_m1p0
|= TIME_STAMP_INT_ENABLE
;
6665 cp_m1p1
|= TIME_STAMP_INT_ENABLE
;
6668 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
6671 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
6674 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring
->pipe
);
6677 } else if (ring
->me
== 2) {
6678 switch (ring
->pipe
) {
6680 cp_m2p0
|= TIME_STAMP_INT_ENABLE
;
6683 cp_m2p1
|= TIME_STAMP_INT_ENABLE
;
6686 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
6689 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
6692 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring
->pipe
);
6696 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring
->me
);
6699 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_CP2_INDEX
])) {
6700 struct radeon_ring
*ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
6701 DRM_DEBUG("si_irq_set: sw int cp2\n");
6702 if (ring
->me
== 1) {
6703 switch (ring
->pipe
) {
6705 cp_m1p0
|= TIME_STAMP_INT_ENABLE
;
6708 cp_m1p1
|= TIME_STAMP_INT_ENABLE
;
6711 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
6714 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
6717 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring
->pipe
);
6720 } else if (ring
->me
== 2) {
6721 switch (ring
->pipe
) {
6723 cp_m2p0
|= TIME_STAMP_INT_ENABLE
;
6726 cp_m2p1
|= TIME_STAMP_INT_ENABLE
;
6729 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
6732 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
6735 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring
->pipe
);
6739 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring
->me
);
6743 if (atomic_read(&rdev
->irq
.ring_int
[R600_RING_TYPE_DMA_INDEX
])) {
6744 DRM_DEBUG("cik_irq_set: sw int dma\n");
6745 dma_cntl
|= TRAP_ENABLE
;
6748 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_DMA1_INDEX
])) {
6749 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6750 dma_cntl1
|= TRAP_ENABLE
;
6753 if (rdev
->irq
.crtc_vblank_int
[0] ||
6754 atomic_read(&rdev
->irq
.pflip
[0])) {
6755 DRM_DEBUG("cik_irq_set: vblank 0\n");
6756 crtc1
|= VBLANK_INTERRUPT_MASK
;
6758 if (rdev
->irq
.crtc_vblank_int
[1] ||
6759 atomic_read(&rdev
->irq
.pflip
[1])) {
6760 DRM_DEBUG("cik_irq_set: vblank 1\n");
6761 crtc2
|= VBLANK_INTERRUPT_MASK
;
6763 if (rdev
->irq
.crtc_vblank_int
[2] ||
6764 atomic_read(&rdev
->irq
.pflip
[2])) {
6765 DRM_DEBUG("cik_irq_set: vblank 2\n");
6766 crtc3
|= VBLANK_INTERRUPT_MASK
;
6768 if (rdev
->irq
.crtc_vblank_int
[3] ||
6769 atomic_read(&rdev
->irq
.pflip
[3])) {
6770 DRM_DEBUG("cik_irq_set: vblank 3\n");
6771 crtc4
|= VBLANK_INTERRUPT_MASK
;
6773 if (rdev
->irq
.crtc_vblank_int
[4] ||
6774 atomic_read(&rdev
->irq
.pflip
[4])) {
6775 DRM_DEBUG("cik_irq_set: vblank 4\n");
6776 crtc5
|= VBLANK_INTERRUPT_MASK
;
6778 if (rdev
->irq
.crtc_vblank_int
[5] ||
6779 atomic_read(&rdev
->irq
.pflip
[5])) {
6780 DRM_DEBUG("cik_irq_set: vblank 5\n");
6781 crtc6
|= VBLANK_INTERRUPT_MASK
;
6783 if (rdev
->irq
.hpd
[0]) {
6784 DRM_DEBUG("cik_irq_set: hpd 1\n");
6785 hpd1
|= DC_HPDx_INT_EN
;
6787 if (rdev
->irq
.hpd
[1]) {
6788 DRM_DEBUG("cik_irq_set: hpd 2\n");
6789 hpd2
|= DC_HPDx_INT_EN
;
6791 if (rdev
->irq
.hpd
[2]) {
6792 DRM_DEBUG("cik_irq_set: hpd 3\n");
6793 hpd3
|= DC_HPDx_INT_EN
;
6795 if (rdev
->irq
.hpd
[3]) {
6796 DRM_DEBUG("cik_irq_set: hpd 4\n");
6797 hpd4
|= DC_HPDx_INT_EN
;
6799 if (rdev
->irq
.hpd
[4]) {
6800 DRM_DEBUG("cik_irq_set: hpd 5\n");
6801 hpd5
|= DC_HPDx_INT_EN
;
6803 if (rdev
->irq
.hpd
[5]) {
6804 DRM_DEBUG("cik_irq_set: hpd 6\n");
6805 hpd6
|= DC_HPDx_INT_EN
;
6808 if (rdev
->irq
.dpm_thermal
) {
6809 DRM_DEBUG("dpm thermal\n");
6810 if (rdev
->flags
& RADEON_IS_IGP
)
6811 thermal_int
|= THERM_INTH_MASK
| THERM_INTL_MASK
;
6813 thermal_int
|= THERM_INT_MASK_HIGH
| THERM_INT_MASK_LOW
;
6816 WREG32(CP_INT_CNTL_RING0
, cp_int_cntl
);
6818 WREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
, dma_cntl
);
6819 WREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
, dma_cntl1
);
6821 WREG32(CP_ME1_PIPE0_INT_CNTL
, cp_m1p0
);
6822 WREG32(CP_ME1_PIPE1_INT_CNTL
, cp_m1p1
);
6823 WREG32(CP_ME1_PIPE2_INT_CNTL
, cp_m1p2
);
6824 WREG32(CP_ME1_PIPE3_INT_CNTL
, cp_m1p3
);
6825 WREG32(CP_ME2_PIPE0_INT_CNTL
, cp_m2p0
);
6826 WREG32(CP_ME2_PIPE1_INT_CNTL
, cp_m2p1
);
6827 WREG32(CP_ME2_PIPE2_INT_CNTL
, cp_m2p2
);
6828 WREG32(CP_ME2_PIPE3_INT_CNTL
, cp_m2p3
);
6830 WREG32(GRBM_INT_CNTL
, grbm_int_cntl
);
6832 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, crtc1
);
6833 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, crtc2
);
6834 if (rdev
->num_crtc
>= 4) {
6835 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, crtc3
);
6836 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, crtc4
);
6838 if (rdev
->num_crtc
>= 6) {
6839 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, crtc5
);
6840 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, crtc6
);
6843 WREG32(DC_HPD1_INT_CONTROL
, hpd1
);
6844 WREG32(DC_HPD2_INT_CONTROL
, hpd2
);
6845 WREG32(DC_HPD3_INT_CONTROL
, hpd3
);
6846 WREG32(DC_HPD4_INT_CONTROL
, hpd4
);
6847 WREG32(DC_HPD5_INT_CONTROL
, hpd5
);
6848 WREG32(DC_HPD6_INT_CONTROL
, hpd6
);
6850 if (rdev
->flags
& RADEON_IS_IGP
)
6851 WREG32_SMC(CG_THERMAL_INT_CTRL
, thermal_int
);
6853 WREG32_SMC(CG_THERMAL_INT
, thermal_int
);
6859 * cik_irq_ack - ack interrupt sources
6861 * @rdev: radeon_device pointer
6863 * Ack interrupt sources on the GPU (vblanks, hpd,
6864 * etc.) (CIK). Certain interrupts sources are sw
6865 * generated and do not require an explicit ack.
6867 static inline void cik_irq_ack(struct radeon_device
*rdev
)
6871 rdev
->irq
.stat_regs
.cik
.disp_int
= RREG32(DISP_INTERRUPT_STATUS
);
6872 rdev
->irq
.stat_regs
.cik
.disp_int_cont
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE
);
6873 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE2
);
6874 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE3
);
6875 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE4
);
6876 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE5
);
6877 rdev
->irq
.stat_regs
.cik
.disp_int_cont6
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE6
);
6879 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VBLANK_INTERRUPT
)
6880 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VBLANK_ACK
);
6881 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VLINE_INTERRUPT
)
6882 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VLINE_ACK
);
6883 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
)
6884 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VBLANK_ACK
);
6885 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
)
6886 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VLINE_ACK
);
6888 if (rdev
->num_crtc
>= 4) {
6889 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
)
6890 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VBLANK_ACK
);
6891 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
)
6892 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VLINE_ACK
);
6893 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
)
6894 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VBLANK_ACK
);
6895 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
)
6896 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VLINE_ACK
);
6899 if (rdev
->num_crtc
>= 6) {
6900 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
)
6901 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VBLANK_ACK
);
6902 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
)
6903 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VLINE_ACK
);
6904 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
)
6905 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VBLANK_ACK
);
6906 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
)
6907 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VLINE_ACK
);
6910 if (rdev
->irq
.stat_regs
.cik
.disp_int
& DC_HPD1_INTERRUPT
) {
6911 tmp
= RREG32(DC_HPD1_INT_CONTROL
);
6912 tmp
|= DC_HPDx_INT_ACK
;
6913 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
6915 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
6916 tmp
= RREG32(DC_HPD2_INT_CONTROL
);
6917 tmp
|= DC_HPDx_INT_ACK
;
6918 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
6920 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
6921 tmp
= RREG32(DC_HPD3_INT_CONTROL
);
6922 tmp
|= DC_HPDx_INT_ACK
;
6923 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
6925 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
6926 tmp
= RREG32(DC_HPD4_INT_CONTROL
);
6927 tmp
|= DC_HPDx_INT_ACK
;
6928 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
6930 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
6931 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
6932 tmp
|= DC_HPDx_INT_ACK
;
6933 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
6935 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
6936 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
6937 tmp
|= DC_HPDx_INT_ACK
;
6938 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
6943 * cik_irq_disable - disable interrupts
6945 * @rdev: radeon_device pointer
6947 * Disable interrupts on the hw (CIK).
6949 static void cik_irq_disable(struct radeon_device
*rdev
)
6951 cik_disable_interrupts(rdev
);
6952 /* Wait and acknowledge irq */
6955 cik_disable_interrupt_state(rdev
);
6959 * cik_irq_disable - disable interrupts for suspend
6961 * @rdev: radeon_device pointer
6963 * Disable interrupts and stop the RLC (CIK).
6966 static void cik_irq_suspend(struct radeon_device
*rdev
)
6968 cik_irq_disable(rdev
);
6973 * cik_irq_fini - tear down interrupt support
6975 * @rdev: radeon_device pointer
6977 * Disable interrupts on the hw and free the IH ring
6979 * Used for driver unload.
6981 static void cik_irq_fini(struct radeon_device
*rdev
)
6983 cik_irq_suspend(rdev
);
6984 r600_ih_ring_fini(rdev
);
6988 * cik_get_ih_wptr - get the IH ring buffer wptr
6990 * @rdev: radeon_device pointer
6992 * Get the IH ring buffer wptr from either the register
6993 * or the writeback memory buffer (CIK). Also check for
6994 * ring buffer overflow and deal with it.
6995 * Used by cik_irq_process().
6996 * Returns the value of the wptr.
6998 static inline u32
cik_get_ih_wptr(struct radeon_device
*rdev
)
7002 if (rdev
->wb
.enabled
)
7003 wptr
= le32_to_cpu(rdev
->wb
.wb
[R600_WB_IH_WPTR_OFFSET
/4]);
7005 wptr
= RREG32(IH_RB_WPTR
);
7007 if (wptr
& RB_OVERFLOW
) {
7008 /* When a ring buffer overflow happen start parsing interrupt
7009 * from the last not overwritten vector (wptr + 16). Hopefully
7010 * this should allow us to catchup.
7012 dev_warn(rdev
->dev
, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7013 wptr
, rdev
->ih
.rptr
, (wptr
+ 16) + rdev
->ih
.ptr_mask
);
7014 rdev
->ih
.rptr
= (wptr
+ 16) & rdev
->ih
.ptr_mask
;
7015 tmp
= RREG32(IH_RB_CNTL
);
7016 tmp
|= IH_WPTR_OVERFLOW_CLEAR
;
7017 WREG32(IH_RB_CNTL
, tmp
);
7019 return (wptr
& rdev
->ih
.ptr_mask
);
7023 * Each IV ring entry is 128 bits:
7024 * [7:0] - interrupt source id
7026 * [59:32] - interrupt source data
7027 * [63:60] - reserved
7030 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7031 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7032 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7033 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7034 * PIPE_ID - ME0 0=3D
7035 * - ME1&2 compute dispatcher (4 pipes each)
7037 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7038 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7039 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7042 * [127:96] - reserved
7045 * cik_irq_process - interrupt handler
7047 * @rdev: radeon_device pointer
7049 * Interrupt hander (CIK). Walk the IH ring,
7050 * ack interrupts and schedule work to handle
7052 * Returns irq process return code.
7054 int cik_irq_process(struct radeon_device
*rdev
)
7056 struct radeon_ring
*cp1_ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
7057 struct radeon_ring
*cp2_ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
7060 u32 src_id
, src_data
, ring_id
;
7061 u8 me_id
, pipe_id
, queue_id
;
7063 bool queue_hotplug
= false;
7064 bool queue_reset
= false;
7065 u32 addr
, status
, mc_client
;
7066 bool queue_thermal
= false;
7068 if (!rdev
->ih
.enabled
|| rdev
->shutdown
)
7071 wptr
= cik_get_ih_wptr(rdev
);
7074 /* is somebody else already processing irqs? */
7075 if (atomic_xchg(&rdev
->ih
.lock
, 1))
7078 rptr
= rdev
->ih
.rptr
;
7079 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr
, wptr
);
7081 /* Order reading of wptr vs. reading of IH ring data */
7084 /* display interrupts */
7087 while (rptr
!= wptr
) {
7088 /* wptr/rptr are in bytes! */
7089 ring_index
= rptr
/ 4;
7090 src_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
]) & 0xff;
7091 src_data
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 1]) & 0xfffffff;
7092 ring_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 2]) & 0xff;
7095 case 1: /* D1 vblank/vline */
7097 case 0: /* D1 vblank */
7098 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VBLANK_INTERRUPT
) {
7099 if (rdev
->irq
.crtc_vblank_int
[0]) {
7100 drm_handle_vblank(rdev
->ddev
, 0);
7101 rdev
->pm
.vblank_sync
= true;
7102 wake_up(&rdev
->irq
.vblank_queue
);
7104 if (atomic_read(&rdev
->irq
.pflip
[0]))
7105 radeon_crtc_handle_flip(rdev
, 0);
7106 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~LB_D1_VBLANK_INTERRUPT
;
7107 DRM_DEBUG("IH: D1 vblank\n");
7110 case 1: /* D1 vline */
7111 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VLINE_INTERRUPT
) {
7112 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~LB_D1_VLINE_INTERRUPT
;
7113 DRM_DEBUG("IH: D1 vline\n");
7117 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7121 case 2: /* D2 vblank/vline */
7123 case 0: /* D2 vblank */
7124 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
) {
7125 if (rdev
->irq
.crtc_vblank_int
[1]) {
7126 drm_handle_vblank(rdev
->ddev
, 1);
7127 rdev
->pm
.vblank_sync
= true;
7128 wake_up(&rdev
->irq
.vblank_queue
);
7130 if (atomic_read(&rdev
->irq
.pflip
[1]))
7131 radeon_crtc_handle_flip(rdev
, 1);
7132 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~LB_D2_VBLANK_INTERRUPT
;
7133 DRM_DEBUG("IH: D2 vblank\n");
7136 case 1: /* D2 vline */
7137 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
) {
7138 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~LB_D2_VLINE_INTERRUPT
;
7139 DRM_DEBUG("IH: D2 vline\n");
7143 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7147 case 3: /* D3 vblank/vline */
7149 case 0: /* D3 vblank */
7150 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
) {
7151 if (rdev
->irq
.crtc_vblank_int
[2]) {
7152 drm_handle_vblank(rdev
->ddev
, 2);
7153 rdev
->pm
.vblank_sync
= true;
7154 wake_up(&rdev
->irq
.vblank_queue
);
7156 if (atomic_read(&rdev
->irq
.pflip
[2]))
7157 radeon_crtc_handle_flip(rdev
, 2);
7158 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~LB_D3_VBLANK_INTERRUPT
;
7159 DRM_DEBUG("IH: D3 vblank\n");
7162 case 1: /* D3 vline */
7163 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
) {
7164 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~LB_D3_VLINE_INTERRUPT
;
7165 DRM_DEBUG("IH: D3 vline\n");
7169 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7173 case 4: /* D4 vblank/vline */
7175 case 0: /* D4 vblank */
7176 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
) {
7177 if (rdev
->irq
.crtc_vblank_int
[3]) {
7178 drm_handle_vblank(rdev
->ddev
, 3);
7179 rdev
->pm
.vblank_sync
= true;
7180 wake_up(&rdev
->irq
.vblank_queue
);
7182 if (atomic_read(&rdev
->irq
.pflip
[3]))
7183 radeon_crtc_handle_flip(rdev
, 3);
7184 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~LB_D4_VBLANK_INTERRUPT
;
7185 DRM_DEBUG("IH: D4 vblank\n");
7188 case 1: /* D4 vline */
7189 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
) {
7190 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~LB_D4_VLINE_INTERRUPT
;
7191 DRM_DEBUG("IH: D4 vline\n");
7195 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7199 case 5: /* D5 vblank/vline */
7201 case 0: /* D5 vblank */
7202 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
) {
7203 if (rdev
->irq
.crtc_vblank_int
[4]) {
7204 drm_handle_vblank(rdev
->ddev
, 4);
7205 rdev
->pm
.vblank_sync
= true;
7206 wake_up(&rdev
->irq
.vblank_queue
);
7208 if (atomic_read(&rdev
->irq
.pflip
[4]))
7209 radeon_crtc_handle_flip(rdev
, 4);
7210 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~LB_D5_VBLANK_INTERRUPT
;
7211 DRM_DEBUG("IH: D5 vblank\n");
7214 case 1: /* D5 vline */
7215 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
) {
7216 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~LB_D5_VLINE_INTERRUPT
;
7217 DRM_DEBUG("IH: D5 vline\n");
7221 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7225 case 6: /* D6 vblank/vline */
7227 case 0: /* D6 vblank */
7228 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
) {
7229 if (rdev
->irq
.crtc_vblank_int
[5]) {
7230 drm_handle_vblank(rdev
->ddev
, 5);
7231 rdev
->pm
.vblank_sync
= true;
7232 wake_up(&rdev
->irq
.vblank_queue
);
7234 if (atomic_read(&rdev
->irq
.pflip
[5]))
7235 radeon_crtc_handle_flip(rdev
, 5);
7236 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~LB_D6_VBLANK_INTERRUPT
;
7237 DRM_DEBUG("IH: D6 vblank\n");
7240 case 1: /* D6 vline */
7241 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
) {
7242 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~LB_D6_VLINE_INTERRUPT
;
7243 DRM_DEBUG("IH: D6 vline\n");
7247 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7251 case 42: /* HPD hotplug */
7254 if (rdev
->irq
.stat_regs
.cik
.disp_int
& DC_HPD1_INTERRUPT
) {
7255 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~DC_HPD1_INTERRUPT
;
7256 queue_hotplug
= true;
7257 DRM_DEBUG("IH: HPD1\n");
7261 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
7262 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~DC_HPD2_INTERRUPT
;
7263 queue_hotplug
= true;
7264 DRM_DEBUG("IH: HPD2\n");
7268 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
7269 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~DC_HPD3_INTERRUPT
;
7270 queue_hotplug
= true;
7271 DRM_DEBUG("IH: HPD3\n");
7275 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
7276 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~DC_HPD4_INTERRUPT
;
7277 queue_hotplug
= true;
7278 DRM_DEBUG("IH: HPD4\n");
7282 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
7283 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~DC_HPD5_INTERRUPT
;
7284 queue_hotplug
= true;
7285 DRM_DEBUG("IH: HPD5\n");
7289 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
7290 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~DC_HPD6_INTERRUPT
;
7291 queue_hotplug
= true;
7292 DRM_DEBUG("IH: HPD6\n");
7296 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7302 addr
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
);
7303 status
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
);
7304 mc_client
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT
);
7305 dev_err(rdev
->dev
, "GPU fault detected: %d 0x%08x\n", src_id
, src_data
);
7306 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7308 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7310 cik_vm_decode_fault(rdev
, status
, addr
, mc_client
);
7311 /* reset addr and status */
7312 WREG32_P(VM_CONTEXT1_CNTL2
, 1, ~1);
7314 case 176: /* GFX RB CP_INT */
7315 case 177: /* GFX IB CP_INT */
7316 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
7318 case 181: /* CP EOP event */
7319 DRM_DEBUG("IH: CP EOP\n");
7320 /* XXX check the bitfield order! */
7321 me_id
= (ring_id
& 0x60) >> 5;
7322 pipe_id
= (ring_id
& 0x18) >> 3;
7323 queue_id
= (ring_id
& 0x7) >> 0;
7326 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
7330 if ((cp1_ring
->me
== me_id
) & (cp1_ring
->pipe
== pipe_id
))
7331 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
7332 if ((cp2_ring
->me
== me_id
) & (cp2_ring
->pipe
== pipe_id
))
7333 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
7337 case 184: /* CP Privileged reg access */
7338 DRM_ERROR("Illegal register access in command stream\n");
7339 /* XXX check the bitfield order! */
7340 me_id
= (ring_id
& 0x60) >> 5;
7341 pipe_id
= (ring_id
& 0x18) >> 3;
7342 queue_id
= (ring_id
& 0x7) >> 0;
7345 /* This results in a full GPU reset, but all we need to do is soft
7346 * reset the CP for gfx
7360 case 185: /* CP Privileged inst */
7361 DRM_ERROR("Illegal instruction in command stream\n");
7362 /* XXX check the bitfield order! */
7363 me_id
= (ring_id
& 0x60) >> 5;
7364 pipe_id
= (ring_id
& 0x18) >> 3;
7365 queue_id
= (ring_id
& 0x7) >> 0;
7368 /* This results in a full GPU reset, but all we need to do is soft
7369 * reset the CP for gfx
7383 case 224: /* SDMA trap event */
7384 /* XXX check the bitfield order! */
7385 me_id
= (ring_id
& 0x3) >> 0;
7386 queue_id
= (ring_id
& 0xc) >> 2;
7387 DRM_DEBUG("IH: SDMA trap\n");
7392 radeon_fence_process(rdev
, R600_RING_TYPE_DMA_INDEX
);
7405 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
7417 case 230: /* thermal low to high */
7418 DRM_DEBUG("IH: thermal low to high\n");
7419 rdev
->pm
.dpm
.thermal
.high_to_low
= false;
7420 queue_thermal
= true;
7422 case 231: /* thermal high to low */
7423 DRM_DEBUG("IH: thermal high to low\n");
7424 rdev
->pm
.dpm
.thermal
.high_to_low
= true;
7425 queue_thermal
= true;
7427 case 233: /* GUI IDLE */
7428 DRM_DEBUG("IH: GUI idle\n");
7430 case 241: /* SDMA Privileged inst */
7431 case 247: /* SDMA Privileged inst */
7432 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7433 /* XXX check the bitfield order! */
7434 me_id
= (ring_id
& 0x3) >> 0;
7435 queue_id
= (ring_id
& 0xc) >> 2;
7470 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7474 /* wptr/rptr are in bytes! */
7476 rptr
&= rdev
->ih
.ptr_mask
;
7479 schedule_work(&rdev
->hotplug_work
);
7481 schedule_work(&rdev
->reset_work
);
7483 schedule_work(&rdev
->pm
.dpm
.thermal
.work
);
7484 rdev
->ih
.rptr
= rptr
;
7485 WREG32(IH_RB_RPTR
, rdev
->ih
.rptr
);
7486 atomic_set(&rdev
->ih
.lock
, 0);
7488 /* make sure wptr hasn't changed while processing */
7489 wptr
= cik_get_ih_wptr(rdev
);
7497 * startup/shutdown callbacks
7500 * cik_startup - program the asic to a functional state
7502 * @rdev: radeon_device pointer
7504 * Programs the asic to a functional state (CIK).
7505 * Called by cik_init() and cik_resume().
7506 * Returns 0 for success, error for failure.
7508 static int cik_startup(struct radeon_device
*rdev
)
7510 struct radeon_ring
*ring
;
7513 /* enable pcie gen2/3 link */
7514 cik_pcie_gen3_enable(rdev
);
7516 cik_program_aspm(rdev
);
7518 cik_mc_program(rdev
);
7520 if (rdev
->flags
& RADEON_IS_IGP
) {
7521 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
7522 !rdev
->mec_fw
|| !rdev
->sdma_fw
|| !rdev
->rlc_fw
) {
7523 r
= cik_init_microcode(rdev
);
7525 DRM_ERROR("Failed to load firmware!\n");
7530 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
7531 !rdev
->mec_fw
|| !rdev
->sdma_fw
|| !rdev
->rlc_fw
||
7533 r
= cik_init_microcode(rdev
);
7535 DRM_ERROR("Failed to load firmware!\n");
7540 r
= ci_mc_load_microcode(rdev
);
7542 DRM_ERROR("Failed to load MC firmware!\n");
7547 r
= r600_vram_scratch_init(rdev
);
7551 r
= cik_pcie_gart_enable(rdev
);
7556 /* allocate rlc buffers */
7557 if (rdev
->flags
& RADEON_IS_IGP
) {
7558 if (rdev
->family
== CHIP_KAVERI
) {
7559 rdev
->rlc
.reg_list
= spectre_rlc_save_restore_register_list
;
7560 rdev
->rlc
.reg_list_size
=
7561 (u32
)ARRAY_SIZE(spectre_rlc_save_restore_register_list
);
7563 rdev
->rlc
.reg_list
= kalindi_rlc_save_restore_register_list
;
7564 rdev
->rlc
.reg_list_size
=
7565 (u32
)ARRAY_SIZE(kalindi_rlc_save_restore_register_list
);
7568 rdev
->rlc
.cs_data
= ci_cs_data
;
7569 rdev
->rlc
.cp_table_size
= CP_ME_TABLE_SIZE
* 5 * 4;
7570 r
= sumo_rlc_init(rdev
);
7572 DRM_ERROR("Failed to init rlc BOs!\n");
7576 /* allocate wb buffer */
7577 r
= radeon_wb_init(rdev
);
7581 /* allocate mec buffers */
7582 r
= cik_mec_init(rdev
);
7584 DRM_ERROR("Failed to init MEC BOs!\n");
7588 r
= radeon_fence_driver_start_ring(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
7590 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
7594 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
7596 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
7600 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
7602 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
7606 r
= radeon_fence_driver_start_ring(rdev
, R600_RING_TYPE_DMA_INDEX
);
7608 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
7612 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
7614 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
7618 r
= uvd_v4_2_resume(rdev
);
7620 r
= radeon_fence_driver_start_ring(rdev
,
7621 R600_RING_TYPE_UVD_INDEX
);
7623 dev_err(rdev
->dev
, "UVD fences init error (%d).\n", r
);
7626 rdev
->ring
[R600_RING_TYPE_UVD_INDEX
].ring_size
= 0;
7629 if (!rdev
->irq
.installed
) {
7630 r
= radeon_irq_kms_init(rdev
);
7635 r
= cik_irq_init(rdev
);
7637 DRM_ERROR("radeon: IH init failed (%d).\n", r
);
7638 radeon_irq_kms_fini(rdev
);
7643 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
7644 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP_RPTR_OFFSET
,
7645 CP_RB0_RPTR
, CP_RB0_WPTR
,
7650 /* set up the compute queues */
7651 /* type-2 packets are deprecated on MEC, use type-3 instead */
7652 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
7653 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP1_RPTR_OFFSET
,
7654 CP_HQD_PQ_RPTR
, CP_HQD_PQ_WPTR
,
7655 PACKET3(PACKET3_NOP
, 0x3FFF));
7658 ring
->me
= 1; /* first MEC */
7659 ring
->pipe
= 0; /* first pipe */
7660 ring
->queue
= 0; /* first queue */
7661 ring
->wptr_offs
= CIK_WB_CP1_WPTR_OFFSET
;
7663 /* type-2 packets are deprecated on MEC, use type-3 instead */
7664 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
7665 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP2_RPTR_OFFSET
,
7666 CP_HQD_PQ_RPTR
, CP_HQD_PQ_WPTR
,
7667 PACKET3(PACKET3_NOP
, 0x3FFF));
7670 /* dGPU only have 1 MEC */
7671 ring
->me
= 1; /* first MEC */
7672 ring
->pipe
= 0; /* first pipe */
7673 ring
->queue
= 1; /* second queue */
7674 ring
->wptr_offs
= CIK_WB_CP2_WPTR_OFFSET
;
7676 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
7677 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, R600_WB_DMA_RPTR_OFFSET
,
7678 SDMA0_GFX_RB_RPTR
+ SDMA0_REGISTER_OFFSET
,
7679 SDMA0_GFX_RB_WPTR
+ SDMA0_REGISTER_OFFSET
,
7680 SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
7684 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
7685 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, CAYMAN_WB_DMA1_RPTR_OFFSET
,
7686 SDMA0_GFX_RB_RPTR
+ SDMA1_REGISTER_OFFSET
,
7687 SDMA0_GFX_RB_WPTR
+ SDMA1_REGISTER_OFFSET
,
7688 SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
7692 r
= cik_cp_resume(rdev
);
7696 r
= cik_sdma_resume(rdev
);
7700 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
7701 if (ring
->ring_size
) {
7702 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, 0,
7703 UVD_RBC_RB_RPTR
, UVD_RBC_RB_WPTR
,
7706 r
= uvd_v1_0_init(rdev
);
7708 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r
);
7711 r
= radeon_ib_pool_init(rdev
);
7713 dev_err(rdev
->dev
, "IB initialization failed (%d).\n", r
);
7717 r
= radeon_vm_manager_init(rdev
);
7719 dev_err(rdev
->dev
, "vm manager initialization failed (%d).\n", r
);
7727 * cik_resume - resume the asic to a functional state
7729 * @rdev: radeon_device pointer
7731 * Programs the asic to a functional state (CIK).
7733 * Returns 0 for success, error for failure.
7735 int cik_resume(struct radeon_device
*rdev
)
7740 atom_asic_init(rdev
->mode_info
.atom_context
);
7742 /* init golden registers */
7743 cik_init_golden_registers(rdev
);
7745 rdev
->accel_working
= true;
7746 r
= cik_startup(rdev
);
7748 DRM_ERROR("cik startup failed on resume\n");
7749 rdev
->accel_working
= false;
7758 * cik_suspend - suspend the asic
7760 * @rdev: radeon_device pointer
7762 * Bring the chip into a state suitable for suspend (CIK).
7763 * Called at suspend.
7764 * Returns 0 for success.
7766 int cik_suspend(struct radeon_device
*rdev
)
7768 radeon_vm_manager_fini(rdev
);
7769 cik_cp_enable(rdev
, false);
7770 cik_sdma_enable(rdev
, false);
7771 uvd_v1_0_fini(rdev
);
7772 radeon_uvd_suspend(rdev
);
7773 cik_irq_suspend(rdev
);
7774 radeon_wb_disable(rdev
);
7775 cik_pcie_gart_disable(rdev
);
7779 /* Plan is to move initialization in that function and use
7780 * helper function so that radeon_device_init pretty much
7781 * do nothing more than calling asic specific function. This
7782 * should also allow to remove a bunch of callback function
7786 * cik_init - asic specific driver and hw init
7788 * @rdev: radeon_device pointer
7790 * Setup asic specific driver variables and program the hw
7791 * to a functional state (CIK).
7792 * Called at driver startup.
7793 * Returns 0 for success, errors for failure.
7795 int cik_init(struct radeon_device
*rdev
)
7797 struct radeon_ring
*ring
;
7801 if (!radeon_get_bios(rdev
)) {
7802 if (ASIC_IS_AVIVO(rdev
))
7805 /* Must be an ATOMBIOS */
7806 if (!rdev
->is_atom_bios
) {
7807 dev_err(rdev
->dev
, "Expecting atombios for cayman GPU\n");
7810 r
= radeon_atombios_init(rdev
);
7814 /* Post card if necessary */
7815 if (!radeon_card_posted(rdev
)) {
7817 dev_err(rdev
->dev
, "Card not posted and no BIOS - ignoring\n");
7820 DRM_INFO("GPU not posted. posting now...\n");
7821 atom_asic_init(rdev
->mode_info
.atom_context
);
7823 /* init golden registers */
7824 cik_init_golden_registers(rdev
);
7825 /* Initialize scratch registers */
7826 cik_scratch_init(rdev
);
7827 /* Initialize surface registers */
7828 radeon_surface_init(rdev
);
7829 /* Initialize clocks */
7830 radeon_get_clock_info(rdev
->ddev
);
7833 r
= radeon_fence_driver_init(rdev
);
7837 /* initialize memory controller */
7838 r
= cik_mc_init(rdev
);
7841 /* Memory manager */
7842 r
= radeon_bo_init(rdev
);
7846 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
7847 ring
->ring_obj
= NULL
;
7848 r600_ring_init(rdev
, ring
, 1024 * 1024);
7850 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
7851 ring
->ring_obj
= NULL
;
7852 r600_ring_init(rdev
, ring
, 1024 * 1024);
7853 r
= radeon_doorbell_get(rdev
, &ring
->doorbell_page_num
);
7857 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
7858 ring
->ring_obj
= NULL
;
7859 r600_ring_init(rdev
, ring
, 1024 * 1024);
7860 r
= radeon_doorbell_get(rdev
, &ring
->doorbell_page_num
);
7864 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
7865 ring
->ring_obj
= NULL
;
7866 r600_ring_init(rdev
, ring
, 256 * 1024);
7868 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
7869 ring
->ring_obj
= NULL
;
7870 r600_ring_init(rdev
, ring
, 256 * 1024);
7872 r
= radeon_uvd_init(rdev
);
7874 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
7875 ring
->ring_obj
= NULL
;
7876 r600_ring_init(rdev
, ring
, 4096);
7879 rdev
->ih
.ring_obj
= NULL
;
7880 r600_ih_ring_init(rdev
, 64 * 1024);
7882 r
= r600_pcie_gart_init(rdev
);
7886 rdev
->accel_working
= true;
7887 r
= cik_startup(rdev
);
7889 dev_err(rdev
->dev
, "disabling GPU acceleration\n");
7891 cik_sdma_fini(rdev
);
7893 sumo_rlc_fini(rdev
);
7895 radeon_wb_fini(rdev
);
7896 radeon_ib_pool_fini(rdev
);
7897 radeon_vm_manager_fini(rdev
);
7898 radeon_irq_kms_fini(rdev
);
7899 cik_pcie_gart_fini(rdev
);
7900 rdev
->accel_working
= false;
7903 /* Don't start up if the MC ucode is missing.
7904 * The default clocks and voltages before the MC ucode
7905 * is loaded are not suffient for advanced operations.
7907 if (!rdev
->mc_fw
&& !(rdev
->flags
& RADEON_IS_IGP
)) {
7908 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7916 * cik_fini - asic specific driver and hw fini
7918 * @rdev: radeon_device pointer
7920 * Tear down the asic specific driver variables and program the hw
7921 * to an idle state (CIK).
7922 * Called at driver unload.
7924 void cik_fini(struct radeon_device
*rdev
)
7927 cik_sdma_fini(rdev
);
7929 sumo_rlc_fini(rdev
);
7931 radeon_wb_fini(rdev
);
7932 radeon_vm_manager_fini(rdev
);
7933 radeon_ib_pool_fini(rdev
);
7934 radeon_irq_kms_fini(rdev
);
7935 uvd_v1_0_fini(rdev
);
7936 radeon_uvd_fini(rdev
);
7937 cik_pcie_gart_fini(rdev
);
7938 r600_vram_scratch_fini(rdev
);
7939 radeon_gem_fini(rdev
);
7940 radeon_fence_driver_fini(rdev
);
7941 radeon_bo_fini(rdev
);
7942 radeon_atombios_fini(rdev
);
7947 /* display watermark setup */
7949 * dce8_line_buffer_adjust - Set up the line buffer
7951 * @rdev: radeon_device pointer
7952 * @radeon_crtc: the selected display controller
7953 * @mode: the current display mode on the selected display
7956 * Setup up the line buffer allocation for
7957 * the selected display controller (CIK).
7958 * Returns the line buffer size in pixels.
7960 static u32
dce8_line_buffer_adjust(struct radeon_device
*rdev
,
7961 struct radeon_crtc
*radeon_crtc
,
7962 struct drm_display_mode
*mode
)
7968 * There are 6 line buffers, one for each display controllers.
7969 * There are 3 partitions per LB. Select the number of partitions
7970 * to enable based on the display width. For display widths larger
7971 * than 4096, you need use to use 2 display controllers and combine
7972 * them using the stereo blender.
7974 if (radeon_crtc
->base
.enabled
&& mode
) {
7975 if (mode
->crtc_hdisplay
< 1920)
7977 else if (mode
->crtc_hdisplay
< 2560)
7979 else if (mode
->crtc_hdisplay
< 4096)
7982 DRM_DEBUG_KMS("Mode too big for LB!\n");
7988 WREG32(LB_MEMORY_CTRL
+ radeon_crtc
->crtc_offset
,
7989 LB_MEMORY_CONFIG(tmp
) | LB_MEMORY_SIZE(0x6B0));
7991 if (radeon_crtc
->base
.enabled
&& mode
) {
8003 /* controller not enabled, so no lb used */
8008 * cik_get_number_of_dram_channels - get the number of dram channels
8010 * @rdev: radeon_device pointer
8012 * Look up the number of video ram channels (CIK).
8013 * Used for display watermark bandwidth calculations
8014 * Returns the number of dram channels
8016 static u32
cik_get_number_of_dram_channels(struct radeon_device
*rdev
)
8018 u32 tmp
= RREG32(MC_SHARED_CHMAP
);
8020 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
8043 struct dce8_wm_params
{
8044 u32 dram_channels
; /* number of dram channels */
8045 u32 yclk
; /* bandwidth per dram data pin in kHz */
8046 u32 sclk
; /* engine clock in kHz */
8047 u32 disp_clk
; /* display clock in kHz */
8048 u32 src_width
; /* viewport width */
8049 u32 active_time
; /* active display time in ns */
8050 u32 blank_time
; /* blank time in ns */
8051 bool interlaced
; /* mode is interlaced */
8052 fixed20_12 vsc
; /* vertical scale ratio */
8053 u32 num_heads
; /* number of active crtcs */
8054 u32 bytes_per_pixel
; /* bytes per pixel display + overlay */
8055 u32 lb_size
; /* line buffer allocated to pipe */
8056 u32 vtaps
; /* vertical scaler taps */
8060 * dce8_dram_bandwidth - get the dram bandwidth
8062 * @wm: watermark calculation data
8064 * Calculate the raw dram bandwidth (CIK).
8065 * Used for display watermark bandwidth calculations
8066 * Returns the dram bandwidth in MBytes/s
8068 static u32
dce8_dram_bandwidth(struct dce8_wm_params
*wm
)
8070 /* Calculate raw DRAM Bandwidth */
8071 fixed20_12 dram_efficiency
; /* 0.7 */
8072 fixed20_12 yclk
, dram_channels
, bandwidth
;
8075 a
.full
= dfixed_const(1000);
8076 yclk
.full
= dfixed_const(wm
->yclk
);
8077 yclk
.full
= dfixed_div(yclk
, a
);
8078 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
8079 a
.full
= dfixed_const(10);
8080 dram_efficiency
.full
= dfixed_const(7);
8081 dram_efficiency
.full
= dfixed_div(dram_efficiency
, a
);
8082 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
8083 bandwidth
.full
= dfixed_mul(bandwidth
, dram_efficiency
);
8085 return dfixed_trunc(bandwidth
);
8089 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8091 * @wm: watermark calculation data
8093 * Calculate the dram bandwidth used for display (CIK).
8094 * Used for display watermark bandwidth calculations
8095 * Returns the dram bandwidth for display in MBytes/s
8097 static u32
dce8_dram_bandwidth_for_display(struct dce8_wm_params
*wm
)
8099 /* Calculate DRAM Bandwidth and the part allocated to display. */
8100 fixed20_12 disp_dram_allocation
; /* 0.3 to 0.7 */
8101 fixed20_12 yclk
, dram_channels
, bandwidth
;
8104 a
.full
= dfixed_const(1000);
8105 yclk
.full
= dfixed_const(wm
->yclk
);
8106 yclk
.full
= dfixed_div(yclk
, a
);
8107 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
8108 a
.full
= dfixed_const(10);
8109 disp_dram_allocation
.full
= dfixed_const(3); /* XXX worse case value 0.3 */
8110 disp_dram_allocation
.full
= dfixed_div(disp_dram_allocation
, a
);
8111 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
8112 bandwidth
.full
= dfixed_mul(bandwidth
, disp_dram_allocation
);
8114 return dfixed_trunc(bandwidth
);
8118 * dce8_data_return_bandwidth - get the data return bandwidth
8120 * @wm: watermark calculation data
8122 * Calculate the data return bandwidth used for display (CIK).
8123 * Used for display watermark bandwidth calculations
8124 * Returns the data return bandwidth in MBytes/s
8126 static u32
dce8_data_return_bandwidth(struct dce8_wm_params
*wm
)
8128 /* Calculate the display Data return Bandwidth */
8129 fixed20_12 return_efficiency
; /* 0.8 */
8130 fixed20_12 sclk
, bandwidth
;
8133 a
.full
= dfixed_const(1000);
8134 sclk
.full
= dfixed_const(wm
->sclk
);
8135 sclk
.full
= dfixed_div(sclk
, a
);
8136 a
.full
= dfixed_const(10);
8137 return_efficiency
.full
= dfixed_const(8);
8138 return_efficiency
.full
= dfixed_div(return_efficiency
, a
);
8139 a
.full
= dfixed_const(32);
8140 bandwidth
.full
= dfixed_mul(a
, sclk
);
8141 bandwidth
.full
= dfixed_mul(bandwidth
, return_efficiency
);
8143 return dfixed_trunc(bandwidth
);
8147 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8149 * @wm: watermark calculation data
8151 * Calculate the dmif bandwidth used for display (CIK).
8152 * Used for display watermark bandwidth calculations
8153 * Returns the dmif bandwidth in MBytes/s
8155 static u32
dce8_dmif_request_bandwidth(struct dce8_wm_params
*wm
)
8157 /* Calculate the DMIF Request Bandwidth */
8158 fixed20_12 disp_clk_request_efficiency
; /* 0.8 */
8159 fixed20_12 disp_clk
, bandwidth
;
8162 a
.full
= dfixed_const(1000);
8163 disp_clk
.full
= dfixed_const(wm
->disp_clk
);
8164 disp_clk
.full
= dfixed_div(disp_clk
, a
);
8165 a
.full
= dfixed_const(32);
8166 b
.full
= dfixed_mul(a
, disp_clk
);
8168 a
.full
= dfixed_const(10);
8169 disp_clk_request_efficiency
.full
= dfixed_const(8);
8170 disp_clk_request_efficiency
.full
= dfixed_div(disp_clk_request_efficiency
, a
);
8172 bandwidth
.full
= dfixed_mul(b
, disp_clk_request_efficiency
);
8174 return dfixed_trunc(bandwidth
);
8178 * dce8_available_bandwidth - get the min available bandwidth
8180 * @wm: watermark calculation data
8182 * Calculate the min available bandwidth used for display (CIK).
8183 * Used for display watermark bandwidth calculations
8184 * Returns the min available bandwidth in MBytes/s
8186 static u32
dce8_available_bandwidth(struct dce8_wm_params
*wm
)
8188 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8189 u32 dram_bandwidth
= dce8_dram_bandwidth(wm
);
8190 u32 data_return_bandwidth
= dce8_data_return_bandwidth(wm
);
8191 u32 dmif_req_bandwidth
= dce8_dmif_request_bandwidth(wm
);
8193 return min(dram_bandwidth
, min(data_return_bandwidth
, dmif_req_bandwidth
));
8197 * dce8_average_bandwidth - get the average available bandwidth
8199 * @wm: watermark calculation data
8201 * Calculate the average available bandwidth used for display (CIK).
8202 * Used for display watermark bandwidth calculations
8203 * Returns the average available bandwidth in MBytes/s
8205 static u32
dce8_average_bandwidth(struct dce8_wm_params
*wm
)
8207 /* Calculate the display mode Average Bandwidth
8208 * DisplayMode should contain the source and destination dimensions,
8212 fixed20_12 line_time
;
8213 fixed20_12 src_width
;
8214 fixed20_12 bandwidth
;
8217 a
.full
= dfixed_const(1000);
8218 line_time
.full
= dfixed_const(wm
->active_time
+ wm
->blank_time
);
8219 line_time
.full
= dfixed_div(line_time
, a
);
8220 bpp
.full
= dfixed_const(wm
->bytes_per_pixel
);
8221 src_width
.full
= dfixed_const(wm
->src_width
);
8222 bandwidth
.full
= dfixed_mul(src_width
, bpp
);
8223 bandwidth
.full
= dfixed_mul(bandwidth
, wm
->vsc
);
8224 bandwidth
.full
= dfixed_div(bandwidth
, line_time
);
8226 return dfixed_trunc(bandwidth
);
8230 * dce8_latency_watermark - get the latency watermark
8232 * @wm: watermark calculation data
8234 * Calculate the latency watermark (CIK).
8235 * Used for display watermark bandwidth calculations
8236 * Returns the latency watermark in ns
8238 static u32
dce8_latency_watermark(struct dce8_wm_params
*wm
)
8240 /* First calculate the latency in ns */
8241 u32 mc_latency
= 2000; /* 2000 ns. */
8242 u32 available_bandwidth
= dce8_available_bandwidth(wm
);
8243 u32 worst_chunk_return_time
= (512 * 8 * 1000) / available_bandwidth
;
8244 u32 cursor_line_pair_return_time
= (128 * 4 * 1000) / available_bandwidth
;
8245 u32 dc_latency
= 40000000 / wm
->disp_clk
; /* dc pipe latency */
8246 u32 other_heads_data_return_time
= ((wm
->num_heads
+ 1) * worst_chunk_return_time
) +
8247 (wm
->num_heads
* cursor_line_pair_return_time
);
8248 u32 latency
= mc_latency
+ other_heads_data_return_time
+ dc_latency
;
8249 u32 max_src_lines_per_dst_line
, lb_fill_bw
, line_fill_time
;
8250 u32 tmp
, dmif_size
= 12288;
8253 if (wm
->num_heads
== 0)
8256 a
.full
= dfixed_const(2);
8257 b
.full
= dfixed_const(1);
8258 if ((wm
->vsc
.full
> a
.full
) ||
8259 ((wm
->vsc
.full
> b
.full
) && (wm
->vtaps
>= 3)) ||
8261 ((wm
->vsc
.full
>= a
.full
) && wm
->interlaced
))
8262 max_src_lines_per_dst_line
= 4;
8264 max_src_lines_per_dst_line
= 2;
8266 a
.full
= dfixed_const(available_bandwidth
);
8267 b
.full
= dfixed_const(wm
->num_heads
);
8268 a
.full
= dfixed_div(a
, b
);
8270 b
.full
= dfixed_const(mc_latency
+ 512);
8271 c
.full
= dfixed_const(wm
->disp_clk
);
8272 b
.full
= dfixed_div(b
, c
);
8274 c
.full
= dfixed_const(dmif_size
);
8275 b
.full
= dfixed_div(c
, b
);
8277 tmp
= min(dfixed_trunc(a
), dfixed_trunc(b
));
8279 b
.full
= dfixed_const(1000);
8280 c
.full
= dfixed_const(wm
->disp_clk
);
8281 b
.full
= dfixed_div(c
, b
);
8282 c
.full
= dfixed_const(wm
->bytes_per_pixel
);
8283 b
.full
= dfixed_mul(b
, c
);
8285 lb_fill_bw
= min(tmp
, dfixed_trunc(b
));
8287 a
.full
= dfixed_const(max_src_lines_per_dst_line
* wm
->src_width
* wm
->bytes_per_pixel
);
8288 b
.full
= dfixed_const(1000);
8289 c
.full
= dfixed_const(lb_fill_bw
);
8290 b
.full
= dfixed_div(c
, b
);
8291 a
.full
= dfixed_div(a
, b
);
8292 line_fill_time
= dfixed_trunc(a
);
8294 if (line_fill_time
< wm
->active_time
)
8297 return latency
+ (line_fill_time
- wm
->active_time
);
8302 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8303 * average and available dram bandwidth
8305 * @wm: watermark calculation data
8307 * Check if the display average bandwidth fits in the display
8308 * dram bandwidth (CIK).
8309 * Used for display watermark bandwidth calculations
8310 * Returns true if the display fits, false if not.
8312 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params
*wm
)
8314 if (dce8_average_bandwidth(wm
) <=
8315 (dce8_dram_bandwidth_for_display(wm
) / wm
->num_heads
))
8322 * dce8_average_bandwidth_vs_available_bandwidth - check
8323 * average and available bandwidth
8325 * @wm: watermark calculation data
8327 * Check if the display average bandwidth fits in the display
8328 * available bandwidth (CIK).
8329 * Used for display watermark bandwidth calculations
8330 * Returns true if the display fits, false if not.
8332 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params
*wm
)
8334 if (dce8_average_bandwidth(wm
) <=
8335 (dce8_available_bandwidth(wm
) / wm
->num_heads
))
8342 * dce8_check_latency_hiding - check latency hiding
8344 * @wm: watermark calculation data
8346 * Check latency hiding (CIK).
8347 * Used for display watermark bandwidth calculations
8348 * Returns true if the display fits, false if not.
8350 static bool dce8_check_latency_hiding(struct dce8_wm_params
*wm
)
8352 u32 lb_partitions
= wm
->lb_size
/ wm
->src_width
;
8353 u32 line_time
= wm
->active_time
+ wm
->blank_time
;
8354 u32 latency_tolerant_lines
;
8358 a
.full
= dfixed_const(1);
8359 if (wm
->vsc
.full
> a
.full
)
8360 latency_tolerant_lines
= 1;
8362 if (lb_partitions
<= (wm
->vtaps
+ 1))
8363 latency_tolerant_lines
= 1;
8365 latency_tolerant_lines
= 2;
8368 latency_hiding
= (latency_tolerant_lines
* line_time
+ wm
->blank_time
);
8370 if (dce8_latency_watermark(wm
) <= latency_hiding
)
8377 * dce8_program_watermarks - program display watermarks
8379 * @rdev: radeon_device pointer
8380 * @radeon_crtc: the selected display controller
8381 * @lb_size: line buffer size
8382 * @num_heads: number of display controllers in use
8384 * Calculate and program the display watermarks for the
8385 * selected display controller (CIK).
8387 static void dce8_program_watermarks(struct radeon_device
*rdev
,
8388 struct radeon_crtc
*radeon_crtc
,
8389 u32 lb_size
, u32 num_heads
)
8391 struct drm_display_mode
*mode
= &radeon_crtc
->base
.mode
;
8392 struct dce8_wm_params wm_low
, wm_high
;
8395 u32 latency_watermark_a
= 0, latency_watermark_b
= 0;
8398 if (radeon_crtc
->base
.enabled
&& num_heads
&& mode
) {
8399 pixel_period
= 1000000 / (u32
)mode
->clock
;
8400 line_time
= min((u32
)mode
->crtc_htotal
* pixel_period
, (u32
)65535);
8402 /* watermark for high clocks */
8403 if ((rdev
->pm
.pm_method
== PM_METHOD_DPM
) &&
8404 rdev
->pm
.dpm_enabled
) {
8406 radeon_dpm_get_mclk(rdev
, false) * 10;
8408 radeon_dpm_get_sclk(rdev
, false) * 10;
8410 wm_high
.yclk
= rdev
->pm
.current_mclk
* 10;
8411 wm_high
.sclk
= rdev
->pm
.current_sclk
* 10;
8414 wm_high
.disp_clk
= mode
->clock
;
8415 wm_high
.src_width
= mode
->crtc_hdisplay
;
8416 wm_high
.active_time
= mode
->crtc_hdisplay
* pixel_period
;
8417 wm_high
.blank_time
= line_time
- wm_high
.active_time
;
8418 wm_high
.interlaced
= false;
8419 if (mode
->flags
& DRM_MODE_FLAG_INTERLACE
)
8420 wm_high
.interlaced
= true;
8421 wm_high
.vsc
= radeon_crtc
->vsc
;
8423 if (radeon_crtc
->rmx_type
!= RMX_OFF
)
8425 wm_high
.bytes_per_pixel
= 4; /* XXX: get this from fb config */
8426 wm_high
.lb_size
= lb_size
;
8427 wm_high
.dram_channels
= cik_get_number_of_dram_channels(rdev
);
8428 wm_high
.num_heads
= num_heads
;
8430 /* set for high clocks */
8431 latency_watermark_a
= min(dce8_latency_watermark(&wm_high
), (u32
)65535);
8433 /* possibly force display priority to high */
8434 /* should really do this at mode validation time... */
8435 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high
) ||
8436 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high
) ||
8437 !dce8_check_latency_hiding(&wm_high
) ||
8438 (rdev
->disp_priority
== 2)) {
8439 DRM_DEBUG_KMS("force priority to high\n");
8442 /* watermark for low clocks */
8443 if ((rdev
->pm
.pm_method
== PM_METHOD_DPM
) &&
8444 rdev
->pm
.dpm_enabled
) {
8446 radeon_dpm_get_mclk(rdev
, true) * 10;
8448 radeon_dpm_get_sclk(rdev
, true) * 10;
8450 wm_low
.yclk
= rdev
->pm
.current_mclk
* 10;
8451 wm_low
.sclk
= rdev
->pm
.current_sclk
* 10;
8454 wm_low
.disp_clk
= mode
->clock
;
8455 wm_low
.src_width
= mode
->crtc_hdisplay
;
8456 wm_low
.active_time
= mode
->crtc_hdisplay
* pixel_period
;
8457 wm_low
.blank_time
= line_time
- wm_low
.active_time
;
8458 wm_low
.interlaced
= false;
8459 if (mode
->flags
& DRM_MODE_FLAG_INTERLACE
)
8460 wm_low
.interlaced
= true;
8461 wm_low
.vsc
= radeon_crtc
->vsc
;
8463 if (radeon_crtc
->rmx_type
!= RMX_OFF
)
8465 wm_low
.bytes_per_pixel
= 4; /* XXX: get this from fb config */
8466 wm_low
.lb_size
= lb_size
;
8467 wm_low
.dram_channels
= cik_get_number_of_dram_channels(rdev
);
8468 wm_low
.num_heads
= num_heads
;
8470 /* set for low clocks */
8471 latency_watermark_b
= min(dce8_latency_watermark(&wm_low
), (u32
)65535);
8473 /* possibly force display priority to high */
8474 /* should really do this at mode validation time... */
8475 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low
) ||
8476 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low
) ||
8477 !dce8_check_latency_hiding(&wm_low
) ||
8478 (rdev
->disp_priority
== 2)) {
8479 DRM_DEBUG_KMS("force priority to high\n");
8484 wm_mask
= RREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
);
8486 tmp
&= ~LATENCY_WATERMARK_MASK(3);
8487 tmp
|= LATENCY_WATERMARK_MASK(1);
8488 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, tmp
);
8489 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
8490 (LATENCY_LOW_WATERMARK(latency_watermark_a
) |
8491 LATENCY_HIGH_WATERMARK(line_time
)));
8493 tmp
= RREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
);
8494 tmp
&= ~LATENCY_WATERMARK_MASK(3);
8495 tmp
|= LATENCY_WATERMARK_MASK(2);
8496 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, tmp
);
8497 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
8498 (LATENCY_LOW_WATERMARK(latency_watermark_b
) |
8499 LATENCY_HIGH_WATERMARK(line_time
)));
8500 /* restore original selection */
8501 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, wm_mask
);
8503 /* save values for DPM */
8504 radeon_crtc
->line_time
= line_time
;
8505 radeon_crtc
->wm_high
= latency_watermark_a
;
8506 radeon_crtc
->wm_low
= latency_watermark_b
;
8510 * dce8_bandwidth_update - program display watermarks
8512 * @rdev: radeon_device pointer
8514 * Calculate and program the display watermarks and line
8515 * buffer allocation (CIK).
8517 void dce8_bandwidth_update(struct radeon_device
*rdev
)
8519 struct drm_display_mode
*mode
= NULL
;
8520 u32 num_heads
= 0, lb_size
;
8523 radeon_update_display_priority(rdev
);
8525 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
8526 if (rdev
->mode_info
.crtcs
[i
]->base
.enabled
)
8529 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
8530 mode
= &rdev
->mode_info
.crtcs
[i
]->base
.mode
;
8531 lb_size
= dce8_line_buffer_adjust(rdev
, rdev
->mode_info
.crtcs
[i
], mode
);
8532 dce8_program_watermarks(rdev
, rdev
->mode_info
.crtcs
[i
], lb_size
, num_heads
);
8537 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8539 * @rdev: radeon_device pointer
8541 * Fetches a GPU clock counter snapshot (SI).
8542 * Returns the 64 bit clock counter snapshot.
8544 uint64_t cik_get_gpu_clock_counter(struct radeon_device
*rdev
)
8548 mutex_lock(&rdev
->gpu_clock_mutex
);
8549 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
8550 clock
= (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB
) |
8551 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
8552 mutex_unlock(&rdev
->gpu_clock_mutex
);
8556 static int cik_set_uvd_clock(struct radeon_device
*rdev
, u32 clock
,
8557 u32 cntl_reg
, u32 status_reg
)
8560 struct atom_clock_dividers dividers
;
8563 r
= radeon_atom_get_clock_dividers(rdev
, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK
,
8564 clock
, false, ÷rs
);
8568 tmp
= RREG32_SMC(cntl_reg
);
8569 tmp
&= ~(DCLK_DIR_CNTL_EN
|DCLK_DIVIDER_MASK
);
8570 tmp
|= dividers
.post_divider
;
8571 WREG32_SMC(cntl_reg
, tmp
);
8573 for (i
= 0; i
< 100; i
++) {
8574 if (RREG32_SMC(status_reg
) & DCLK_STATUS
)
8584 int cik_set_uvd_clocks(struct radeon_device
*rdev
, u32 vclk
, u32 dclk
)
8588 r
= cik_set_uvd_clock(rdev
, vclk
, CG_VCLK_CNTL
, CG_VCLK_STATUS
);
8592 r
= cik_set_uvd_clock(rdev
, dclk
, CG_DCLK_CNTL
, CG_DCLK_STATUS
);
8596 static void cik_pcie_gen3_enable(struct radeon_device
*rdev
)
8598 struct pci_dev
*root
= rdev
->pdev
->bus
->self
;
8599 int bridge_pos
, gpu_pos
;
8600 u32 speed_cntl
, mask
, current_data_rate
;
8604 if (radeon_pcie_gen2
== 0)
8607 if (rdev
->flags
& RADEON_IS_IGP
)
8610 if (!(rdev
->flags
& RADEON_IS_PCIE
))
8613 ret
= drm_pcie_get_speed_cap_mask(rdev
->ddev
, &mask
);
8617 if (!(mask
& (DRM_PCIE_SPEED_50
| DRM_PCIE_SPEED_80
)))
8620 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
8621 current_data_rate
= (speed_cntl
& LC_CURRENT_DATA_RATE_MASK
) >>
8622 LC_CURRENT_DATA_RATE_SHIFT
;
8623 if (mask
& DRM_PCIE_SPEED_80
) {
8624 if (current_data_rate
== 2) {
8625 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8628 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8629 } else if (mask
& DRM_PCIE_SPEED_50
) {
8630 if (current_data_rate
== 1) {
8631 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8634 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8637 bridge_pos
= pci_pcie_cap(root
);
8641 gpu_pos
= pci_pcie_cap(rdev
->pdev
);
8645 if (mask
& DRM_PCIE_SPEED_80
) {
8646 /* re-try equalization if gen3 is not already enabled */
8647 if (current_data_rate
!= 2) {
8648 u16 bridge_cfg
, gpu_cfg
;
8649 u16 bridge_cfg2
, gpu_cfg2
;
8650 u32 max_lw
, current_lw
, tmp
;
8652 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, &bridge_cfg
);
8653 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, &gpu_cfg
);
8655 tmp16
= bridge_cfg
| PCI_EXP_LNKCTL_HAWD
;
8656 pci_write_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, tmp16
);
8658 tmp16
= gpu_cfg
| PCI_EXP_LNKCTL_HAWD
;
8659 pci_write_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, tmp16
);
8661 tmp
= RREG32_PCIE_PORT(PCIE_LC_STATUS1
);
8662 max_lw
= (tmp
& LC_DETECTED_LINK_WIDTH_MASK
) >> LC_DETECTED_LINK_WIDTH_SHIFT
;
8663 current_lw
= (tmp
& LC_OPERATING_LINK_WIDTH_MASK
) >> LC_OPERATING_LINK_WIDTH_SHIFT
;
8665 if (current_lw
< max_lw
) {
8666 tmp
= RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
);
8667 if (tmp
& LC_RENEGOTIATION_SUPPORT
) {
8668 tmp
&= ~(LC_LINK_WIDTH_MASK
| LC_UPCONFIGURE_DIS
);
8669 tmp
|= (max_lw
<< LC_LINK_WIDTH_SHIFT
);
8670 tmp
|= LC_UPCONFIGURE_SUPPORT
| LC_RENEGOTIATE_EN
| LC_RECONFIG_NOW
;
8671 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
, tmp
);
8675 for (i
= 0; i
< 10; i
++) {
8677 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_DEVSTA
, &tmp16
);
8678 if (tmp16
& PCI_EXP_DEVSTA_TRPND
)
8681 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, &bridge_cfg
);
8682 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, &gpu_cfg
);
8684 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL2
, &bridge_cfg2
);
8685 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, &gpu_cfg2
);
8687 tmp
= RREG32_PCIE_PORT(PCIE_LC_CNTL4
);
8688 tmp
|= LC_SET_QUIESCE
;
8689 WREG32_PCIE_PORT(PCIE_LC_CNTL4
, tmp
);
8691 tmp
= RREG32_PCIE_PORT(PCIE_LC_CNTL4
);
8693 WREG32_PCIE_PORT(PCIE_LC_CNTL4
, tmp
);
8698 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, &tmp16
);
8699 tmp16
&= ~PCI_EXP_LNKCTL_HAWD
;
8700 tmp16
|= (bridge_cfg
& PCI_EXP_LNKCTL_HAWD
);
8701 pci_write_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, tmp16
);
8703 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, &tmp16
);
8704 tmp16
&= ~PCI_EXP_LNKCTL_HAWD
;
8705 tmp16
|= (gpu_cfg
& PCI_EXP_LNKCTL_HAWD
);
8706 pci_write_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, tmp16
);
8709 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL2
, &tmp16
);
8710 tmp16
&= ~((1 << 4) | (7 << 9));
8711 tmp16
|= (bridge_cfg2
& ((1 << 4) | (7 << 9)));
8712 pci_write_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL2
, tmp16
);
8714 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, &tmp16
);
8715 tmp16
&= ~((1 << 4) | (7 << 9));
8716 tmp16
|= (gpu_cfg2
& ((1 << 4) | (7 << 9)));
8717 pci_write_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, tmp16
);
8719 tmp
= RREG32_PCIE_PORT(PCIE_LC_CNTL4
);
8720 tmp
&= ~LC_SET_QUIESCE
;
8721 WREG32_PCIE_PORT(PCIE_LC_CNTL4
, tmp
);
8726 /* set the link speed */
8727 speed_cntl
|= LC_FORCE_EN_SW_SPEED_CHANGE
| LC_FORCE_DIS_HW_SPEED_CHANGE
;
8728 speed_cntl
&= ~LC_FORCE_DIS_SW_SPEED_CHANGE
;
8729 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
, speed_cntl
);
8731 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, &tmp16
);
8733 if (mask
& DRM_PCIE_SPEED_80
)
8734 tmp16
|= 3; /* gen3 */
8735 else if (mask
& DRM_PCIE_SPEED_50
)
8736 tmp16
|= 2; /* gen2 */
8738 tmp16
|= 1; /* gen1 */
8739 pci_write_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, tmp16
);
8741 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
8742 speed_cntl
|= LC_INITIATE_LINK_SPEED_CHANGE
;
8743 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
, speed_cntl
);
8745 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
8746 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
8747 if ((speed_cntl
& LC_INITIATE_LINK_SPEED_CHANGE
) == 0)
8753 static void cik_program_aspm(struct radeon_device
*rdev
)
8756 bool disable_l0s
= false, disable_l1
= false, disable_plloff_in_l1
= false;
8757 bool disable_clkreq
= false;
8759 if (radeon_aspm
== 0)
8762 /* XXX double check IGPs */
8763 if (rdev
->flags
& RADEON_IS_IGP
)
8766 if (!(rdev
->flags
& RADEON_IS_PCIE
))
8769 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL
);
8770 data
&= ~LC_XMIT_N_FTS_MASK
;
8771 data
|= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN
;
8773 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL
, data
);
8775 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_CNTL3
);
8776 data
|= LC_GO_TO_RECOVERY
;
8778 WREG32_PCIE_PORT(PCIE_LC_CNTL3
, data
);
8780 orig
= data
= RREG32_PCIE_PORT(PCIE_P_CNTL
);
8781 data
|= P_IGNORE_EDB_ERR
;
8783 WREG32_PCIE_PORT(PCIE_P_CNTL
, data
);
8785 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_CNTL
);
8786 data
&= ~(LC_L0S_INACTIVITY_MASK
| LC_L1_INACTIVITY_MASK
);
8787 data
|= LC_PMI_TO_L1_DIS
;
8789 data
|= LC_L0S_INACTIVITY(7);
8792 data
|= LC_L1_INACTIVITY(7);
8793 data
&= ~LC_PMI_TO_L1_DIS
;
8795 WREG32_PCIE_PORT(PCIE_LC_CNTL
, data
);
8797 if (!disable_plloff_in_l1
) {
8798 bool clk_req_support
;
8800 orig
= data
= RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0
);
8801 data
&= ~(PLL_POWER_STATE_IN_OFF_0_MASK
| PLL_POWER_STATE_IN_TXS2_0_MASK
);
8802 data
|= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8804 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0
, data
);
8806 orig
= data
= RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1
);
8807 data
&= ~(PLL_POWER_STATE_IN_OFF_1_MASK
| PLL_POWER_STATE_IN_TXS2_1_MASK
);
8808 data
|= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8810 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1
, data
);
8812 orig
= data
= RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0
);
8813 data
&= ~(PLL_POWER_STATE_IN_OFF_0_MASK
| PLL_POWER_STATE_IN_TXS2_0_MASK
);
8814 data
|= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8816 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0
, data
);
8818 orig
= data
= RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1
);
8819 data
&= ~(PLL_POWER_STATE_IN_OFF_1_MASK
| PLL_POWER_STATE_IN_TXS2_1_MASK
);
8820 data
|= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8822 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1
, data
);
8824 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
);
8825 data
&= ~LC_DYN_LANES_PWR_STATE_MASK
;
8826 data
|= LC_DYN_LANES_PWR_STATE(3);
8828 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
, data
);
8830 if (!disable_clkreq
) {
8831 struct pci_dev
*root
= rdev
->pdev
->bus
->self
;
8834 clk_req_support
= false;
8835 pcie_capability_read_dword(root
, PCI_EXP_LNKCAP
, &lnkcap
);
8836 if (lnkcap
& PCI_EXP_LNKCAP_CLKPM
)
8837 clk_req_support
= true;
8839 clk_req_support
= false;
8842 if (clk_req_support
) {
8843 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_CNTL2
);
8844 data
|= LC_ALLOW_PDWN_IN_L1
| LC_ALLOW_PDWN_IN_L23
;
8846 WREG32_PCIE_PORT(PCIE_LC_CNTL2
, data
);
8848 orig
= data
= RREG32_SMC(THM_CLK_CNTL
);
8849 data
&= ~(CMON_CLK_SEL_MASK
| TMON_CLK_SEL_MASK
);
8850 data
|= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8852 WREG32_SMC(THM_CLK_CNTL
, data
);
8854 orig
= data
= RREG32_SMC(MISC_CLK_CTRL
);
8855 data
&= ~(DEEP_SLEEP_CLK_SEL_MASK
| ZCLK_SEL_MASK
);
8856 data
|= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8858 WREG32_SMC(MISC_CLK_CTRL
, data
);
8860 orig
= data
= RREG32_SMC(CG_CLKPIN_CNTL
);
8861 data
&= ~BCLK_AS_XCLK
;
8863 WREG32_SMC(CG_CLKPIN_CNTL
, data
);
8865 orig
= data
= RREG32_SMC(CG_CLKPIN_CNTL_2
);
8866 data
&= ~FORCE_BIF_REFCLK_EN
;
8868 WREG32_SMC(CG_CLKPIN_CNTL_2
, data
);
8870 orig
= data
= RREG32_SMC(MPLL_BYPASSCLK_SEL
);
8871 data
&= ~MPLL_CLKOUT_SEL_MASK
;
8872 data
|= MPLL_CLKOUT_SEL(4);
8874 WREG32_SMC(MPLL_BYPASSCLK_SEL
, data
);
8879 WREG32_PCIE_PORT(PCIE_LC_CNTL
, data
);
8882 orig
= data
= RREG32_PCIE_PORT(PCIE_CNTL2
);
8883 data
|= SLV_MEM_LS_EN
| MST_MEM_LS_EN
| REPLAY_MEM_LS_EN
;
8885 WREG32_PCIE_PORT(PCIE_CNTL2
, data
);
8888 data
= RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL
);
8889 if((data
& LC_N_FTS_MASK
) == LC_N_FTS_MASK
) {
8890 data
= RREG32_PCIE_PORT(PCIE_LC_STATUS1
);
8891 if ((data
& LC_REVERSE_XMIT
) && (data
& LC_REVERSE_RCVR
)) {
8892 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_CNTL
);
8893 data
&= ~LC_L0S_INACTIVITY_MASK
;
8895 WREG32_PCIE_PORT(PCIE_LC_CNTL
, data
);