2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
29 #include "radeon_asic.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
57 extern int r600_ih_ring_alloc(struct radeon_device
*rdev
);
58 extern void r600_ih_ring_fini(struct radeon_device
*rdev
);
59 extern void evergreen_mc_stop(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
60 extern void evergreen_mc_resume(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
61 extern bool evergreen_is_display_hung(struct radeon_device
*rdev
);
62 extern void sumo_rlc_fini(struct radeon_device
*rdev
);
63 extern int sumo_rlc_init(struct radeon_device
*rdev
);
64 extern void si_vram_gtt_location(struct radeon_device
*rdev
, struct radeon_mc
*mc
);
65 extern void si_rlc_reset(struct radeon_device
*rdev
);
66 extern void si_init_uvd_internal_cg(struct radeon_device
*rdev
);
67 static void cik_rlc_stop(struct radeon_device
*rdev
);
68 static void cik_pcie_gen3_enable(struct radeon_device
*rdev
);
69 static void cik_program_aspm(struct radeon_device
*rdev
);
70 static void cik_init_pg(struct radeon_device
*rdev
);
71 static void cik_init_cg(struct radeon_device
*rdev
);
73 /* get temperature in millidegrees */
74 int ci_get_temp(struct radeon_device
*rdev
)
79 temp
= (RREG32_SMC(CG_MULT_THERMAL_STATUS
) & CTF_TEMP_MASK
) >>
85 actual_temp
= temp
& 0x1ff;
87 actual_temp
= actual_temp
* 1000;
92 /* get temperature in millidegrees */
93 int kv_get_temp(struct radeon_device
*rdev
)
98 temp
= RREG32_SMC(0xC0300E0C);
101 actual_temp
= (temp
/ 8) - 49;
105 actual_temp
= actual_temp
* 1000;
111 * Indirect registers accessor
113 u32
cik_pciep_rreg(struct radeon_device
*rdev
, u32 reg
)
117 WREG32(PCIE_INDEX
, reg
);
118 (void)RREG32(PCIE_INDEX
);
119 r
= RREG32(PCIE_DATA
);
123 void cik_pciep_wreg(struct radeon_device
*rdev
, u32 reg
, u32 v
)
125 WREG32(PCIE_INDEX
, reg
);
126 (void)RREG32(PCIE_INDEX
);
127 WREG32(PCIE_DATA
, v
);
128 (void)RREG32(PCIE_DATA
);
131 static const u32 spectre_rlc_save_restore_register_list
[] =
133 (0x0e00 << 16) | (0xc12c >> 2),
135 (0x0e00 << 16) | (0xc140 >> 2),
137 (0x0e00 << 16) | (0xc150 >> 2),
139 (0x0e00 << 16) | (0xc15c >> 2),
141 (0x0e00 << 16) | (0xc168 >> 2),
143 (0x0e00 << 16) | (0xc170 >> 2),
145 (0x0e00 << 16) | (0xc178 >> 2),
147 (0x0e00 << 16) | (0xc204 >> 2),
149 (0x0e00 << 16) | (0xc2b4 >> 2),
151 (0x0e00 << 16) | (0xc2b8 >> 2),
153 (0x0e00 << 16) | (0xc2bc >> 2),
155 (0x0e00 << 16) | (0xc2c0 >> 2),
157 (0x0e00 << 16) | (0x8228 >> 2),
159 (0x0e00 << 16) | (0x829c >> 2),
161 (0x0e00 << 16) | (0x869c >> 2),
163 (0x0600 << 16) | (0x98f4 >> 2),
165 (0x0e00 << 16) | (0x98f8 >> 2),
167 (0x0e00 << 16) | (0x9900 >> 2),
169 (0x0e00 << 16) | (0xc260 >> 2),
171 (0x0e00 << 16) | (0x90e8 >> 2),
173 (0x0e00 << 16) | (0x3c000 >> 2),
175 (0x0e00 << 16) | (0x3c00c >> 2),
177 (0x0e00 << 16) | (0x8c1c >> 2),
179 (0x0e00 << 16) | (0x9700 >> 2),
181 (0x0e00 << 16) | (0xcd20 >> 2),
183 (0x4e00 << 16) | (0xcd20 >> 2),
185 (0x5e00 << 16) | (0xcd20 >> 2),
187 (0x6e00 << 16) | (0xcd20 >> 2),
189 (0x7e00 << 16) | (0xcd20 >> 2),
191 (0x8e00 << 16) | (0xcd20 >> 2),
193 (0x9e00 << 16) | (0xcd20 >> 2),
195 (0xae00 << 16) | (0xcd20 >> 2),
197 (0xbe00 << 16) | (0xcd20 >> 2),
199 (0x0e00 << 16) | (0x89bc >> 2),
201 (0x0e00 << 16) | (0x8900 >> 2),
204 (0x0e00 << 16) | (0xc130 >> 2),
206 (0x0e00 << 16) | (0xc134 >> 2),
208 (0x0e00 << 16) | (0xc1fc >> 2),
210 (0x0e00 << 16) | (0xc208 >> 2),
212 (0x0e00 << 16) | (0xc264 >> 2),
214 (0x0e00 << 16) | (0xc268 >> 2),
216 (0x0e00 << 16) | (0xc26c >> 2),
218 (0x0e00 << 16) | (0xc270 >> 2),
220 (0x0e00 << 16) | (0xc274 >> 2),
222 (0x0e00 << 16) | (0xc278 >> 2),
224 (0x0e00 << 16) | (0xc27c >> 2),
226 (0x0e00 << 16) | (0xc280 >> 2),
228 (0x0e00 << 16) | (0xc284 >> 2),
230 (0x0e00 << 16) | (0xc288 >> 2),
232 (0x0e00 << 16) | (0xc28c >> 2),
234 (0x0e00 << 16) | (0xc290 >> 2),
236 (0x0e00 << 16) | (0xc294 >> 2),
238 (0x0e00 << 16) | (0xc298 >> 2),
240 (0x0e00 << 16) | (0xc29c >> 2),
242 (0x0e00 << 16) | (0xc2a0 >> 2),
244 (0x0e00 << 16) | (0xc2a4 >> 2),
246 (0x0e00 << 16) | (0xc2a8 >> 2),
248 (0x0e00 << 16) | (0xc2ac >> 2),
250 (0x0e00 << 16) | (0xc2b0 >> 2),
252 (0x0e00 << 16) | (0x301d0 >> 2),
254 (0x0e00 << 16) | (0x30238 >> 2),
256 (0x0e00 << 16) | (0x30250 >> 2),
258 (0x0e00 << 16) | (0x30254 >> 2),
260 (0x0e00 << 16) | (0x30258 >> 2),
262 (0x0e00 << 16) | (0x3025c >> 2),
264 (0x4e00 << 16) | (0xc900 >> 2),
266 (0x5e00 << 16) | (0xc900 >> 2),
268 (0x6e00 << 16) | (0xc900 >> 2),
270 (0x7e00 << 16) | (0xc900 >> 2),
272 (0x8e00 << 16) | (0xc900 >> 2),
274 (0x9e00 << 16) | (0xc900 >> 2),
276 (0xae00 << 16) | (0xc900 >> 2),
278 (0xbe00 << 16) | (0xc900 >> 2),
280 (0x4e00 << 16) | (0xc904 >> 2),
282 (0x5e00 << 16) | (0xc904 >> 2),
284 (0x6e00 << 16) | (0xc904 >> 2),
286 (0x7e00 << 16) | (0xc904 >> 2),
288 (0x8e00 << 16) | (0xc904 >> 2),
290 (0x9e00 << 16) | (0xc904 >> 2),
292 (0xae00 << 16) | (0xc904 >> 2),
294 (0xbe00 << 16) | (0xc904 >> 2),
296 (0x4e00 << 16) | (0xc908 >> 2),
298 (0x5e00 << 16) | (0xc908 >> 2),
300 (0x6e00 << 16) | (0xc908 >> 2),
302 (0x7e00 << 16) | (0xc908 >> 2),
304 (0x8e00 << 16) | (0xc908 >> 2),
306 (0x9e00 << 16) | (0xc908 >> 2),
308 (0xae00 << 16) | (0xc908 >> 2),
310 (0xbe00 << 16) | (0xc908 >> 2),
312 (0x4e00 << 16) | (0xc90c >> 2),
314 (0x5e00 << 16) | (0xc90c >> 2),
316 (0x6e00 << 16) | (0xc90c >> 2),
318 (0x7e00 << 16) | (0xc90c >> 2),
320 (0x8e00 << 16) | (0xc90c >> 2),
322 (0x9e00 << 16) | (0xc90c >> 2),
324 (0xae00 << 16) | (0xc90c >> 2),
326 (0xbe00 << 16) | (0xc90c >> 2),
328 (0x4e00 << 16) | (0xc910 >> 2),
330 (0x5e00 << 16) | (0xc910 >> 2),
332 (0x6e00 << 16) | (0xc910 >> 2),
334 (0x7e00 << 16) | (0xc910 >> 2),
336 (0x8e00 << 16) | (0xc910 >> 2),
338 (0x9e00 << 16) | (0xc910 >> 2),
340 (0xae00 << 16) | (0xc910 >> 2),
342 (0xbe00 << 16) | (0xc910 >> 2),
344 (0x0e00 << 16) | (0xc99c >> 2),
346 (0x0e00 << 16) | (0x9834 >> 2),
348 (0x0000 << 16) | (0x30f00 >> 2),
350 (0x0001 << 16) | (0x30f00 >> 2),
352 (0x0000 << 16) | (0x30f04 >> 2),
354 (0x0001 << 16) | (0x30f04 >> 2),
356 (0x0000 << 16) | (0x30f08 >> 2),
358 (0x0001 << 16) | (0x30f08 >> 2),
360 (0x0000 << 16) | (0x30f0c >> 2),
362 (0x0001 << 16) | (0x30f0c >> 2),
364 (0x0600 << 16) | (0x9b7c >> 2),
366 (0x0e00 << 16) | (0x8a14 >> 2),
368 (0x0e00 << 16) | (0x8a18 >> 2),
370 (0x0600 << 16) | (0x30a00 >> 2),
372 (0x0e00 << 16) | (0x8bf0 >> 2),
374 (0x0e00 << 16) | (0x8bcc >> 2),
376 (0x0e00 << 16) | (0x8b24 >> 2),
378 (0x0e00 << 16) | (0x30a04 >> 2),
380 (0x0600 << 16) | (0x30a10 >> 2),
382 (0x0600 << 16) | (0x30a14 >> 2),
384 (0x0600 << 16) | (0x30a18 >> 2),
386 (0x0600 << 16) | (0x30a2c >> 2),
388 (0x0e00 << 16) | (0xc700 >> 2),
390 (0x0e00 << 16) | (0xc704 >> 2),
392 (0x0e00 << 16) | (0xc708 >> 2),
394 (0x0e00 << 16) | (0xc768 >> 2),
396 (0x0400 << 16) | (0xc770 >> 2),
398 (0x0400 << 16) | (0xc774 >> 2),
400 (0x0400 << 16) | (0xc778 >> 2),
402 (0x0400 << 16) | (0xc77c >> 2),
404 (0x0400 << 16) | (0xc780 >> 2),
406 (0x0400 << 16) | (0xc784 >> 2),
408 (0x0400 << 16) | (0xc788 >> 2),
410 (0x0400 << 16) | (0xc78c >> 2),
412 (0x0400 << 16) | (0xc798 >> 2),
414 (0x0400 << 16) | (0xc79c >> 2),
416 (0x0400 << 16) | (0xc7a0 >> 2),
418 (0x0400 << 16) | (0xc7a4 >> 2),
420 (0x0400 << 16) | (0xc7a8 >> 2),
422 (0x0400 << 16) | (0xc7ac >> 2),
424 (0x0400 << 16) | (0xc7b0 >> 2),
426 (0x0400 << 16) | (0xc7b4 >> 2),
428 (0x0e00 << 16) | (0x9100 >> 2),
430 (0x0e00 << 16) | (0x3c010 >> 2),
432 (0x0e00 << 16) | (0x92a8 >> 2),
434 (0x0e00 << 16) | (0x92ac >> 2),
436 (0x0e00 << 16) | (0x92b4 >> 2),
438 (0x0e00 << 16) | (0x92b8 >> 2),
440 (0x0e00 << 16) | (0x92bc >> 2),
442 (0x0e00 << 16) | (0x92c0 >> 2),
444 (0x0e00 << 16) | (0x92c4 >> 2),
446 (0x0e00 << 16) | (0x92c8 >> 2),
448 (0x0e00 << 16) | (0x92cc >> 2),
450 (0x0e00 << 16) | (0x92d0 >> 2),
452 (0x0e00 << 16) | (0x8c00 >> 2),
454 (0x0e00 << 16) | (0x8c04 >> 2),
456 (0x0e00 << 16) | (0x8c20 >> 2),
458 (0x0e00 << 16) | (0x8c38 >> 2),
460 (0x0e00 << 16) | (0x8c3c >> 2),
462 (0x0e00 << 16) | (0xae00 >> 2),
464 (0x0e00 << 16) | (0x9604 >> 2),
466 (0x0e00 << 16) | (0xac08 >> 2),
468 (0x0e00 << 16) | (0xac0c >> 2),
470 (0x0e00 << 16) | (0xac10 >> 2),
472 (0x0e00 << 16) | (0xac14 >> 2),
474 (0x0e00 << 16) | (0xac58 >> 2),
476 (0x0e00 << 16) | (0xac68 >> 2),
478 (0x0e00 << 16) | (0xac6c >> 2),
480 (0x0e00 << 16) | (0xac70 >> 2),
482 (0x0e00 << 16) | (0xac74 >> 2),
484 (0x0e00 << 16) | (0xac78 >> 2),
486 (0x0e00 << 16) | (0xac7c >> 2),
488 (0x0e00 << 16) | (0xac80 >> 2),
490 (0x0e00 << 16) | (0xac84 >> 2),
492 (0x0e00 << 16) | (0xac88 >> 2),
494 (0x0e00 << 16) | (0xac8c >> 2),
496 (0x0e00 << 16) | (0x970c >> 2),
498 (0x0e00 << 16) | (0x9714 >> 2),
500 (0x0e00 << 16) | (0x9718 >> 2),
502 (0x0e00 << 16) | (0x971c >> 2),
504 (0x0e00 << 16) | (0x31068 >> 2),
506 (0x4e00 << 16) | (0x31068 >> 2),
508 (0x5e00 << 16) | (0x31068 >> 2),
510 (0x6e00 << 16) | (0x31068 >> 2),
512 (0x7e00 << 16) | (0x31068 >> 2),
514 (0x8e00 << 16) | (0x31068 >> 2),
516 (0x9e00 << 16) | (0x31068 >> 2),
518 (0xae00 << 16) | (0x31068 >> 2),
520 (0xbe00 << 16) | (0x31068 >> 2),
522 (0x0e00 << 16) | (0xcd10 >> 2),
524 (0x0e00 << 16) | (0xcd14 >> 2),
526 (0x0e00 << 16) | (0x88b0 >> 2),
528 (0x0e00 << 16) | (0x88b4 >> 2),
530 (0x0e00 << 16) | (0x88b8 >> 2),
532 (0x0e00 << 16) | (0x88bc >> 2),
534 (0x0400 << 16) | (0x89c0 >> 2),
536 (0x0e00 << 16) | (0x88c4 >> 2),
538 (0x0e00 << 16) | (0x88c8 >> 2),
540 (0x0e00 << 16) | (0x88d0 >> 2),
542 (0x0e00 << 16) | (0x88d4 >> 2),
544 (0x0e00 << 16) | (0x88d8 >> 2),
546 (0x0e00 << 16) | (0x8980 >> 2),
548 (0x0e00 << 16) | (0x30938 >> 2),
550 (0x0e00 << 16) | (0x3093c >> 2),
552 (0x0e00 << 16) | (0x30940 >> 2),
554 (0x0e00 << 16) | (0x89a0 >> 2),
556 (0x0e00 << 16) | (0x30900 >> 2),
558 (0x0e00 << 16) | (0x30904 >> 2),
560 (0x0e00 << 16) | (0x89b4 >> 2),
562 (0x0e00 << 16) | (0x3c210 >> 2),
564 (0x0e00 << 16) | (0x3c214 >> 2),
566 (0x0e00 << 16) | (0x3c218 >> 2),
568 (0x0e00 << 16) | (0x8904 >> 2),
571 (0x0e00 << 16) | (0x8c28 >> 2),
572 (0x0e00 << 16) | (0x8c2c >> 2),
573 (0x0e00 << 16) | (0x8c30 >> 2),
574 (0x0e00 << 16) | (0x8c34 >> 2),
575 (0x0e00 << 16) | (0x9600 >> 2),
578 static const u32 kalindi_rlc_save_restore_register_list
[] =
580 (0x0e00 << 16) | (0xc12c >> 2),
582 (0x0e00 << 16) | (0xc140 >> 2),
584 (0x0e00 << 16) | (0xc150 >> 2),
586 (0x0e00 << 16) | (0xc15c >> 2),
588 (0x0e00 << 16) | (0xc168 >> 2),
590 (0x0e00 << 16) | (0xc170 >> 2),
592 (0x0e00 << 16) | (0xc204 >> 2),
594 (0x0e00 << 16) | (0xc2b4 >> 2),
596 (0x0e00 << 16) | (0xc2b8 >> 2),
598 (0x0e00 << 16) | (0xc2bc >> 2),
600 (0x0e00 << 16) | (0xc2c0 >> 2),
602 (0x0e00 << 16) | (0x8228 >> 2),
604 (0x0e00 << 16) | (0x829c >> 2),
606 (0x0e00 << 16) | (0x869c >> 2),
608 (0x0600 << 16) | (0x98f4 >> 2),
610 (0x0e00 << 16) | (0x98f8 >> 2),
612 (0x0e00 << 16) | (0x9900 >> 2),
614 (0x0e00 << 16) | (0xc260 >> 2),
616 (0x0e00 << 16) | (0x90e8 >> 2),
618 (0x0e00 << 16) | (0x3c000 >> 2),
620 (0x0e00 << 16) | (0x3c00c >> 2),
622 (0x0e00 << 16) | (0x8c1c >> 2),
624 (0x0e00 << 16) | (0x9700 >> 2),
626 (0x0e00 << 16) | (0xcd20 >> 2),
628 (0x4e00 << 16) | (0xcd20 >> 2),
630 (0x5e00 << 16) | (0xcd20 >> 2),
632 (0x6e00 << 16) | (0xcd20 >> 2),
634 (0x7e00 << 16) | (0xcd20 >> 2),
636 (0x0e00 << 16) | (0x89bc >> 2),
638 (0x0e00 << 16) | (0x8900 >> 2),
641 (0x0e00 << 16) | (0xc130 >> 2),
643 (0x0e00 << 16) | (0xc134 >> 2),
645 (0x0e00 << 16) | (0xc1fc >> 2),
647 (0x0e00 << 16) | (0xc208 >> 2),
649 (0x0e00 << 16) | (0xc264 >> 2),
651 (0x0e00 << 16) | (0xc268 >> 2),
653 (0x0e00 << 16) | (0xc26c >> 2),
655 (0x0e00 << 16) | (0xc270 >> 2),
657 (0x0e00 << 16) | (0xc274 >> 2),
659 (0x0e00 << 16) | (0xc28c >> 2),
661 (0x0e00 << 16) | (0xc290 >> 2),
663 (0x0e00 << 16) | (0xc294 >> 2),
665 (0x0e00 << 16) | (0xc298 >> 2),
667 (0x0e00 << 16) | (0xc2a0 >> 2),
669 (0x0e00 << 16) | (0xc2a4 >> 2),
671 (0x0e00 << 16) | (0xc2a8 >> 2),
673 (0x0e00 << 16) | (0xc2ac >> 2),
675 (0x0e00 << 16) | (0x301d0 >> 2),
677 (0x0e00 << 16) | (0x30238 >> 2),
679 (0x0e00 << 16) | (0x30250 >> 2),
681 (0x0e00 << 16) | (0x30254 >> 2),
683 (0x0e00 << 16) | (0x30258 >> 2),
685 (0x0e00 << 16) | (0x3025c >> 2),
687 (0x4e00 << 16) | (0xc900 >> 2),
689 (0x5e00 << 16) | (0xc900 >> 2),
691 (0x6e00 << 16) | (0xc900 >> 2),
693 (0x7e00 << 16) | (0xc900 >> 2),
695 (0x4e00 << 16) | (0xc904 >> 2),
697 (0x5e00 << 16) | (0xc904 >> 2),
699 (0x6e00 << 16) | (0xc904 >> 2),
701 (0x7e00 << 16) | (0xc904 >> 2),
703 (0x4e00 << 16) | (0xc908 >> 2),
705 (0x5e00 << 16) | (0xc908 >> 2),
707 (0x6e00 << 16) | (0xc908 >> 2),
709 (0x7e00 << 16) | (0xc908 >> 2),
711 (0x4e00 << 16) | (0xc90c >> 2),
713 (0x5e00 << 16) | (0xc90c >> 2),
715 (0x6e00 << 16) | (0xc90c >> 2),
717 (0x7e00 << 16) | (0xc90c >> 2),
719 (0x4e00 << 16) | (0xc910 >> 2),
721 (0x5e00 << 16) | (0xc910 >> 2),
723 (0x6e00 << 16) | (0xc910 >> 2),
725 (0x7e00 << 16) | (0xc910 >> 2),
727 (0x0e00 << 16) | (0xc99c >> 2),
729 (0x0e00 << 16) | (0x9834 >> 2),
731 (0x0000 << 16) | (0x30f00 >> 2),
733 (0x0000 << 16) | (0x30f04 >> 2),
735 (0x0000 << 16) | (0x30f08 >> 2),
737 (0x0000 << 16) | (0x30f0c >> 2),
739 (0x0600 << 16) | (0x9b7c >> 2),
741 (0x0e00 << 16) | (0x8a14 >> 2),
743 (0x0e00 << 16) | (0x8a18 >> 2),
745 (0x0600 << 16) | (0x30a00 >> 2),
747 (0x0e00 << 16) | (0x8bf0 >> 2),
749 (0x0e00 << 16) | (0x8bcc >> 2),
751 (0x0e00 << 16) | (0x8b24 >> 2),
753 (0x0e00 << 16) | (0x30a04 >> 2),
755 (0x0600 << 16) | (0x30a10 >> 2),
757 (0x0600 << 16) | (0x30a14 >> 2),
759 (0x0600 << 16) | (0x30a18 >> 2),
761 (0x0600 << 16) | (0x30a2c >> 2),
763 (0x0e00 << 16) | (0xc700 >> 2),
765 (0x0e00 << 16) | (0xc704 >> 2),
767 (0x0e00 << 16) | (0xc708 >> 2),
769 (0x0e00 << 16) | (0xc768 >> 2),
771 (0x0400 << 16) | (0xc770 >> 2),
773 (0x0400 << 16) | (0xc774 >> 2),
775 (0x0400 << 16) | (0xc798 >> 2),
777 (0x0400 << 16) | (0xc79c >> 2),
779 (0x0e00 << 16) | (0x9100 >> 2),
781 (0x0e00 << 16) | (0x3c010 >> 2),
783 (0x0e00 << 16) | (0x8c00 >> 2),
785 (0x0e00 << 16) | (0x8c04 >> 2),
787 (0x0e00 << 16) | (0x8c20 >> 2),
789 (0x0e00 << 16) | (0x8c38 >> 2),
791 (0x0e00 << 16) | (0x8c3c >> 2),
793 (0x0e00 << 16) | (0xae00 >> 2),
795 (0x0e00 << 16) | (0x9604 >> 2),
797 (0x0e00 << 16) | (0xac08 >> 2),
799 (0x0e00 << 16) | (0xac0c >> 2),
801 (0x0e00 << 16) | (0xac10 >> 2),
803 (0x0e00 << 16) | (0xac14 >> 2),
805 (0x0e00 << 16) | (0xac58 >> 2),
807 (0x0e00 << 16) | (0xac68 >> 2),
809 (0x0e00 << 16) | (0xac6c >> 2),
811 (0x0e00 << 16) | (0xac70 >> 2),
813 (0x0e00 << 16) | (0xac74 >> 2),
815 (0x0e00 << 16) | (0xac78 >> 2),
817 (0x0e00 << 16) | (0xac7c >> 2),
819 (0x0e00 << 16) | (0xac80 >> 2),
821 (0x0e00 << 16) | (0xac84 >> 2),
823 (0x0e00 << 16) | (0xac88 >> 2),
825 (0x0e00 << 16) | (0xac8c >> 2),
827 (0x0e00 << 16) | (0x970c >> 2),
829 (0x0e00 << 16) | (0x9714 >> 2),
831 (0x0e00 << 16) | (0x9718 >> 2),
833 (0x0e00 << 16) | (0x971c >> 2),
835 (0x0e00 << 16) | (0x31068 >> 2),
837 (0x4e00 << 16) | (0x31068 >> 2),
839 (0x5e00 << 16) | (0x31068 >> 2),
841 (0x6e00 << 16) | (0x31068 >> 2),
843 (0x7e00 << 16) | (0x31068 >> 2),
845 (0x0e00 << 16) | (0xcd10 >> 2),
847 (0x0e00 << 16) | (0xcd14 >> 2),
849 (0x0e00 << 16) | (0x88b0 >> 2),
851 (0x0e00 << 16) | (0x88b4 >> 2),
853 (0x0e00 << 16) | (0x88b8 >> 2),
855 (0x0e00 << 16) | (0x88bc >> 2),
857 (0x0400 << 16) | (0x89c0 >> 2),
859 (0x0e00 << 16) | (0x88c4 >> 2),
861 (0x0e00 << 16) | (0x88c8 >> 2),
863 (0x0e00 << 16) | (0x88d0 >> 2),
865 (0x0e00 << 16) | (0x88d4 >> 2),
867 (0x0e00 << 16) | (0x88d8 >> 2),
869 (0x0e00 << 16) | (0x8980 >> 2),
871 (0x0e00 << 16) | (0x30938 >> 2),
873 (0x0e00 << 16) | (0x3093c >> 2),
875 (0x0e00 << 16) | (0x30940 >> 2),
877 (0x0e00 << 16) | (0x89a0 >> 2),
879 (0x0e00 << 16) | (0x30900 >> 2),
881 (0x0e00 << 16) | (0x30904 >> 2),
883 (0x0e00 << 16) | (0x89b4 >> 2),
885 (0x0e00 << 16) | (0x3e1fc >> 2),
887 (0x0e00 << 16) | (0x3c210 >> 2),
889 (0x0e00 << 16) | (0x3c214 >> 2),
891 (0x0e00 << 16) | (0x3c218 >> 2),
893 (0x0e00 << 16) | (0x8904 >> 2),
896 (0x0e00 << 16) | (0x8c28 >> 2),
897 (0x0e00 << 16) | (0x8c2c >> 2),
898 (0x0e00 << 16) | (0x8c30 >> 2),
899 (0x0e00 << 16) | (0x8c34 >> 2),
900 (0x0e00 << 16) | (0x9600 >> 2),
903 static const u32 bonaire_golden_spm_registers
[] =
905 0x30800, 0xe0ffffff, 0xe0000000
908 static const u32 bonaire_golden_common_registers
[] =
910 0xc770, 0xffffffff, 0x00000800,
911 0xc774, 0xffffffff, 0x00000800,
912 0xc798, 0xffffffff, 0x00007fbf,
913 0xc79c, 0xffffffff, 0x00007faf
916 static const u32 bonaire_golden_registers
[] =
918 0x3354, 0x00000333, 0x00000333,
919 0x3350, 0x000c0fc0, 0x00040200,
920 0x9a10, 0x00010000, 0x00058208,
921 0x3c000, 0xffff1fff, 0x00140000,
922 0x3c200, 0xfdfc0fff, 0x00000100,
923 0x3c234, 0x40000000, 0x40000200,
924 0x9830, 0xffffffff, 0x00000000,
925 0x9834, 0xf00fffff, 0x00000400,
926 0x9838, 0x0002021c, 0x00020200,
927 0xc78, 0x00000080, 0x00000000,
928 0x5bb0, 0x000000f0, 0x00000070,
929 0x5bc0, 0xf0311fff, 0x80300000,
930 0x98f8, 0x73773777, 0x12010001,
931 0x350c, 0x00810000, 0x408af000,
932 0x7030, 0x31000111, 0x00000011,
933 0x2f48, 0x73773777, 0x12010001,
934 0x220c, 0x00007fb6, 0x0021a1b1,
935 0x2210, 0x00007fb6, 0x002021b1,
936 0x2180, 0x00007fb6, 0x00002191,
937 0x2218, 0x00007fb6, 0x002121b1,
938 0x221c, 0x00007fb6, 0x002021b1,
939 0x21dc, 0x00007fb6, 0x00002191,
940 0x21e0, 0x00007fb6, 0x00002191,
941 0x3628, 0x0000003f, 0x0000000a,
942 0x362c, 0x0000003f, 0x0000000a,
943 0x2ae4, 0x00073ffe, 0x000022a2,
944 0x240c, 0x000007ff, 0x00000000,
945 0x8a14, 0xf000003f, 0x00000007,
946 0x8bf0, 0x00002001, 0x00000001,
947 0x8b24, 0xffffffff, 0x00ffffff,
948 0x30a04, 0x0000ff0f, 0x00000000,
949 0x28a4c, 0x07ffffff, 0x06000000,
950 0x4d8, 0x00000fff, 0x00000100,
951 0x3e78, 0x00000001, 0x00000002,
952 0x9100, 0x03000000, 0x0362c688,
953 0x8c00, 0x000000ff, 0x00000001,
954 0xe40, 0x00001fff, 0x00001fff,
955 0x9060, 0x0000007f, 0x00000020,
956 0x9508, 0x00010000, 0x00010000,
957 0xac14, 0x000003ff, 0x000000f3,
958 0xac0c, 0xffffffff, 0x00001032
961 static const u32 bonaire_mgcg_cgcg_init
[] =
963 0xc420, 0xffffffff, 0xfffffffc,
964 0x30800, 0xffffffff, 0xe0000000,
965 0x3c2a0, 0xffffffff, 0x00000100,
966 0x3c208, 0xffffffff, 0x00000100,
967 0x3c2c0, 0xffffffff, 0xc0000100,
968 0x3c2c8, 0xffffffff, 0xc0000100,
969 0x3c2c4, 0xffffffff, 0xc0000100,
970 0x55e4, 0xffffffff, 0x00600100,
971 0x3c280, 0xffffffff, 0x00000100,
972 0x3c214, 0xffffffff, 0x06000100,
973 0x3c220, 0xffffffff, 0x00000100,
974 0x3c218, 0xffffffff, 0x06000100,
975 0x3c204, 0xffffffff, 0x00000100,
976 0x3c2e0, 0xffffffff, 0x00000100,
977 0x3c224, 0xffffffff, 0x00000100,
978 0x3c200, 0xffffffff, 0x00000100,
979 0x3c230, 0xffffffff, 0x00000100,
980 0x3c234, 0xffffffff, 0x00000100,
981 0x3c250, 0xffffffff, 0x00000100,
982 0x3c254, 0xffffffff, 0x00000100,
983 0x3c258, 0xffffffff, 0x00000100,
984 0x3c25c, 0xffffffff, 0x00000100,
985 0x3c260, 0xffffffff, 0x00000100,
986 0x3c27c, 0xffffffff, 0x00000100,
987 0x3c278, 0xffffffff, 0x00000100,
988 0x3c210, 0xffffffff, 0x06000100,
989 0x3c290, 0xffffffff, 0x00000100,
990 0x3c274, 0xffffffff, 0x00000100,
991 0x3c2b4, 0xffffffff, 0x00000100,
992 0x3c2b0, 0xffffffff, 0x00000100,
993 0x3c270, 0xffffffff, 0x00000100,
994 0x30800, 0xffffffff, 0xe0000000,
995 0x3c020, 0xffffffff, 0x00010000,
996 0x3c024, 0xffffffff, 0x00030002,
997 0x3c028, 0xffffffff, 0x00040007,
998 0x3c02c, 0xffffffff, 0x00060005,
999 0x3c030, 0xffffffff, 0x00090008,
1000 0x3c034, 0xffffffff, 0x00010000,
1001 0x3c038, 0xffffffff, 0x00030002,
1002 0x3c03c, 0xffffffff, 0x00040007,
1003 0x3c040, 0xffffffff, 0x00060005,
1004 0x3c044, 0xffffffff, 0x00090008,
1005 0x3c048, 0xffffffff, 0x00010000,
1006 0x3c04c, 0xffffffff, 0x00030002,
1007 0x3c050, 0xffffffff, 0x00040007,
1008 0x3c054, 0xffffffff, 0x00060005,
1009 0x3c058, 0xffffffff, 0x00090008,
1010 0x3c05c, 0xffffffff, 0x00010000,
1011 0x3c060, 0xffffffff, 0x00030002,
1012 0x3c064, 0xffffffff, 0x00040007,
1013 0x3c068, 0xffffffff, 0x00060005,
1014 0x3c06c, 0xffffffff, 0x00090008,
1015 0x3c070, 0xffffffff, 0x00010000,
1016 0x3c074, 0xffffffff, 0x00030002,
1017 0x3c078, 0xffffffff, 0x00040007,
1018 0x3c07c, 0xffffffff, 0x00060005,
1019 0x3c080, 0xffffffff, 0x00090008,
1020 0x3c084, 0xffffffff, 0x00010000,
1021 0x3c088, 0xffffffff, 0x00030002,
1022 0x3c08c, 0xffffffff, 0x00040007,
1023 0x3c090, 0xffffffff, 0x00060005,
1024 0x3c094, 0xffffffff, 0x00090008,
1025 0x3c098, 0xffffffff, 0x00010000,
1026 0x3c09c, 0xffffffff, 0x00030002,
1027 0x3c0a0, 0xffffffff, 0x00040007,
1028 0x3c0a4, 0xffffffff, 0x00060005,
1029 0x3c0a8, 0xffffffff, 0x00090008,
1030 0x3c000, 0xffffffff, 0x96e00200,
1031 0x8708, 0xffffffff, 0x00900100,
1032 0xc424, 0xffffffff, 0x0020003f,
1033 0x38, 0xffffffff, 0x0140001c,
1034 0x3c, 0x000f0000, 0x000f0000,
1035 0x220, 0xffffffff, 0xC060000C,
1036 0x224, 0xc0000fff, 0x00000100,
1037 0xf90, 0xffffffff, 0x00000100,
1038 0xf98, 0x00000101, 0x00000000,
1039 0x20a8, 0xffffffff, 0x00000104,
1040 0x55e4, 0xff000fff, 0x00000100,
1041 0x30cc, 0xc0000fff, 0x00000104,
1042 0xc1e4, 0x00000001, 0x00000001,
1043 0xd00c, 0xff000ff0, 0x00000100,
1044 0xd80c, 0xff000ff0, 0x00000100
1047 static const u32 spectre_golden_spm_registers
[] =
1049 0x30800, 0xe0ffffff, 0xe0000000
1052 static const u32 spectre_golden_common_registers
[] =
1054 0xc770, 0xffffffff, 0x00000800,
1055 0xc774, 0xffffffff, 0x00000800,
1056 0xc798, 0xffffffff, 0x00007fbf,
1057 0xc79c, 0xffffffff, 0x00007faf
1060 static const u32 spectre_golden_registers
[] =
1062 0x3c000, 0xffff1fff, 0x96940200,
1063 0x3c00c, 0xffff0001, 0xff000000,
1064 0x3c200, 0xfffc0fff, 0x00000100,
1065 0x6ed8, 0x00010101, 0x00010000,
1066 0x9834, 0xf00fffff, 0x00000400,
1067 0x9838, 0xfffffffc, 0x00020200,
1068 0x5bb0, 0x000000f0, 0x00000070,
1069 0x5bc0, 0xf0311fff, 0x80300000,
1070 0x98f8, 0x73773777, 0x12010001,
1071 0x9b7c, 0x00ff0000, 0x00fc0000,
1072 0x2f48, 0x73773777, 0x12010001,
1073 0x8a14, 0xf000003f, 0x00000007,
1074 0x8b24, 0xffffffff, 0x00ffffff,
1075 0x28350, 0x3f3f3fff, 0x00000082,
1076 0x28355, 0x0000003f, 0x00000000,
1077 0x3e78, 0x00000001, 0x00000002,
1078 0x913c, 0xffff03df, 0x00000004,
1079 0xc768, 0x00000008, 0x00000008,
1080 0x8c00, 0x000008ff, 0x00000800,
1081 0x9508, 0x00010000, 0x00010000,
1082 0xac0c, 0xffffffff, 0x54763210,
1083 0x214f8, 0x01ff01ff, 0x00000002,
1084 0x21498, 0x007ff800, 0x00200000,
1085 0x2015c, 0xffffffff, 0x00000f40,
1086 0x30934, 0xffffffff, 0x00000001
1089 static const u32 spectre_mgcg_cgcg_init
[] =
1091 0xc420, 0xffffffff, 0xfffffffc,
1092 0x30800, 0xffffffff, 0xe0000000,
1093 0x3c2a0, 0xffffffff, 0x00000100,
1094 0x3c208, 0xffffffff, 0x00000100,
1095 0x3c2c0, 0xffffffff, 0x00000100,
1096 0x3c2c8, 0xffffffff, 0x00000100,
1097 0x3c2c4, 0xffffffff, 0x00000100,
1098 0x55e4, 0xffffffff, 0x00600100,
1099 0x3c280, 0xffffffff, 0x00000100,
1100 0x3c214, 0xffffffff, 0x06000100,
1101 0x3c220, 0xffffffff, 0x00000100,
1102 0x3c218, 0xffffffff, 0x06000100,
1103 0x3c204, 0xffffffff, 0x00000100,
1104 0x3c2e0, 0xffffffff, 0x00000100,
1105 0x3c224, 0xffffffff, 0x00000100,
1106 0x3c200, 0xffffffff, 0x00000100,
1107 0x3c230, 0xffffffff, 0x00000100,
1108 0x3c234, 0xffffffff, 0x00000100,
1109 0x3c250, 0xffffffff, 0x00000100,
1110 0x3c254, 0xffffffff, 0x00000100,
1111 0x3c258, 0xffffffff, 0x00000100,
1112 0x3c25c, 0xffffffff, 0x00000100,
1113 0x3c260, 0xffffffff, 0x00000100,
1114 0x3c27c, 0xffffffff, 0x00000100,
1115 0x3c278, 0xffffffff, 0x00000100,
1116 0x3c210, 0xffffffff, 0x06000100,
1117 0x3c290, 0xffffffff, 0x00000100,
1118 0x3c274, 0xffffffff, 0x00000100,
1119 0x3c2b4, 0xffffffff, 0x00000100,
1120 0x3c2b0, 0xffffffff, 0x00000100,
1121 0x3c270, 0xffffffff, 0x00000100,
1122 0x30800, 0xffffffff, 0xe0000000,
1123 0x3c020, 0xffffffff, 0x00010000,
1124 0x3c024, 0xffffffff, 0x00030002,
1125 0x3c028, 0xffffffff, 0x00040007,
1126 0x3c02c, 0xffffffff, 0x00060005,
1127 0x3c030, 0xffffffff, 0x00090008,
1128 0x3c034, 0xffffffff, 0x00010000,
1129 0x3c038, 0xffffffff, 0x00030002,
1130 0x3c03c, 0xffffffff, 0x00040007,
1131 0x3c040, 0xffffffff, 0x00060005,
1132 0x3c044, 0xffffffff, 0x00090008,
1133 0x3c048, 0xffffffff, 0x00010000,
1134 0x3c04c, 0xffffffff, 0x00030002,
1135 0x3c050, 0xffffffff, 0x00040007,
1136 0x3c054, 0xffffffff, 0x00060005,
1137 0x3c058, 0xffffffff, 0x00090008,
1138 0x3c05c, 0xffffffff, 0x00010000,
1139 0x3c060, 0xffffffff, 0x00030002,
1140 0x3c064, 0xffffffff, 0x00040007,
1141 0x3c068, 0xffffffff, 0x00060005,
1142 0x3c06c, 0xffffffff, 0x00090008,
1143 0x3c070, 0xffffffff, 0x00010000,
1144 0x3c074, 0xffffffff, 0x00030002,
1145 0x3c078, 0xffffffff, 0x00040007,
1146 0x3c07c, 0xffffffff, 0x00060005,
1147 0x3c080, 0xffffffff, 0x00090008,
1148 0x3c084, 0xffffffff, 0x00010000,
1149 0x3c088, 0xffffffff, 0x00030002,
1150 0x3c08c, 0xffffffff, 0x00040007,
1151 0x3c090, 0xffffffff, 0x00060005,
1152 0x3c094, 0xffffffff, 0x00090008,
1153 0x3c098, 0xffffffff, 0x00010000,
1154 0x3c09c, 0xffffffff, 0x00030002,
1155 0x3c0a0, 0xffffffff, 0x00040007,
1156 0x3c0a4, 0xffffffff, 0x00060005,
1157 0x3c0a8, 0xffffffff, 0x00090008,
1158 0x3c0ac, 0xffffffff, 0x00010000,
1159 0x3c0b0, 0xffffffff, 0x00030002,
1160 0x3c0b4, 0xffffffff, 0x00040007,
1161 0x3c0b8, 0xffffffff, 0x00060005,
1162 0x3c0bc, 0xffffffff, 0x00090008,
1163 0x3c000, 0xffffffff, 0x96e00200,
1164 0x8708, 0xffffffff, 0x00900100,
1165 0xc424, 0xffffffff, 0x0020003f,
1166 0x38, 0xffffffff, 0x0140001c,
1167 0x3c, 0x000f0000, 0x000f0000,
1168 0x220, 0xffffffff, 0xC060000C,
1169 0x224, 0xc0000fff, 0x00000100,
1170 0xf90, 0xffffffff, 0x00000100,
1171 0xf98, 0x00000101, 0x00000000,
1172 0x20a8, 0xffffffff, 0x00000104,
1173 0x55e4, 0xff000fff, 0x00000100,
1174 0x30cc, 0xc0000fff, 0x00000104,
1175 0xc1e4, 0x00000001, 0x00000001,
1176 0xd00c, 0xff000ff0, 0x00000100,
1177 0xd80c, 0xff000ff0, 0x00000100
1180 static const u32 kalindi_golden_spm_registers
[] =
1182 0x30800, 0xe0ffffff, 0xe0000000
1185 static const u32 kalindi_golden_common_registers
[] =
1187 0xc770, 0xffffffff, 0x00000800,
1188 0xc774, 0xffffffff, 0x00000800,
1189 0xc798, 0xffffffff, 0x00007fbf,
1190 0xc79c, 0xffffffff, 0x00007faf
1193 static const u32 kalindi_golden_registers
[] =
1195 0x3c000, 0xffffdfff, 0x6e944040,
1196 0x55e4, 0xff607fff, 0xfc000100,
1197 0x3c220, 0xff000fff, 0x00000100,
1198 0x3c224, 0xff000fff, 0x00000100,
1199 0x3c200, 0xfffc0fff, 0x00000100,
1200 0x6ed8, 0x00010101, 0x00010000,
1201 0x9830, 0xffffffff, 0x00000000,
1202 0x9834, 0xf00fffff, 0x00000400,
1203 0x5bb0, 0x000000f0, 0x00000070,
1204 0x5bc0, 0xf0311fff, 0x80300000,
1205 0x98f8, 0x73773777, 0x12010001,
1206 0x98fc, 0xffffffff, 0x00000010,
1207 0x9b7c, 0x00ff0000, 0x00fc0000,
1208 0x8030, 0x00001f0f, 0x0000100a,
1209 0x2f48, 0x73773777, 0x12010001,
1210 0x2408, 0x000fffff, 0x000c007f,
1211 0x8a14, 0xf000003f, 0x00000007,
1212 0x8b24, 0x3fff3fff, 0x00ffcfff,
1213 0x30a04, 0x0000ff0f, 0x00000000,
1214 0x28a4c, 0x07ffffff, 0x06000000,
1215 0x4d8, 0x00000fff, 0x00000100,
1216 0x3e78, 0x00000001, 0x00000002,
1217 0xc768, 0x00000008, 0x00000008,
1218 0x8c00, 0x000000ff, 0x00000003,
1219 0x214f8, 0x01ff01ff, 0x00000002,
1220 0x21498, 0x007ff800, 0x00200000,
1221 0x2015c, 0xffffffff, 0x00000f40,
1222 0x88c4, 0x001f3ae3, 0x00000082,
1223 0x88d4, 0x0000001f, 0x00000010,
1224 0x30934, 0xffffffff, 0x00000000
1227 static const u32 kalindi_mgcg_cgcg_init
[] =
1229 0xc420, 0xffffffff, 0xfffffffc,
1230 0x30800, 0xffffffff, 0xe0000000,
1231 0x3c2a0, 0xffffffff, 0x00000100,
1232 0x3c208, 0xffffffff, 0x00000100,
1233 0x3c2c0, 0xffffffff, 0x00000100,
1234 0x3c2c8, 0xffffffff, 0x00000100,
1235 0x3c2c4, 0xffffffff, 0x00000100,
1236 0x55e4, 0xffffffff, 0x00600100,
1237 0x3c280, 0xffffffff, 0x00000100,
1238 0x3c214, 0xffffffff, 0x06000100,
1239 0x3c220, 0xffffffff, 0x00000100,
1240 0x3c218, 0xffffffff, 0x06000100,
1241 0x3c204, 0xffffffff, 0x00000100,
1242 0x3c2e0, 0xffffffff, 0x00000100,
1243 0x3c224, 0xffffffff, 0x00000100,
1244 0x3c200, 0xffffffff, 0x00000100,
1245 0x3c230, 0xffffffff, 0x00000100,
1246 0x3c234, 0xffffffff, 0x00000100,
1247 0x3c250, 0xffffffff, 0x00000100,
1248 0x3c254, 0xffffffff, 0x00000100,
1249 0x3c258, 0xffffffff, 0x00000100,
1250 0x3c25c, 0xffffffff, 0x00000100,
1251 0x3c260, 0xffffffff, 0x00000100,
1252 0x3c27c, 0xffffffff, 0x00000100,
1253 0x3c278, 0xffffffff, 0x00000100,
1254 0x3c210, 0xffffffff, 0x06000100,
1255 0x3c290, 0xffffffff, 0x00000100,
1256 0x3c274, 0xffffffff, 0x00000100,
1257 0x3c2b4, 0xffffffff, 0x00000100,
1258 0x3c2b0, 0xffffffff, 0x00000100,
1259 0x3c270, 0xffffffff, 0x00000100,
1260 0x30800, 0xffffffff, 0xe0000000,
1261 0x3c020, 0xffffffff, 0x00010000,
1262 0x3c024, 0xffffffff, 0x00030002,
1263 0x3c028, 0xffffffff, 0x00040007,
1264 0x3c02c, 0xffffffff, 0x00060005,
1265 0x3c030, 0xffffffff, 0x00090008,
1266 0x3c034, 0xffffffff, 0x00010000,
1267 0x3c038, 0xffffffff, 0x00030002,
1268 0x3c03c, 0xffffffff, 0x00040007,
1269 0x3c040, 0xffffffff, 0x00060005,
1270 0x3c044, 0xffffffff, 0x00090008,
1271 0x3c000, 0xffffffff, 0x96e00200,
1272 0x8708, 0xffffffff, 0x00900100,
1273 0xc424, 0xffffffff, 0x0020003f,
1274 0x38, 0xffffffff, 0x0140001c,
1275 0x3c, 0x000f0000, 0x000f0000,
1276 0x220, 0xffffffff, 0xC060000C,
1277 0x224, 0xc0000fff, 0x00000100,
1278 0x20a8, 0xffffffff, 0x00000104,
1279 0x55e4, 0xff000fff, 0x00000100,
1280 0x30cc, 0xc0000fff, 0x00000104,
1281 0xc1e4, 0x00000001, 0x00000001,
1282 0xd00c, 0xff000ff0, 0x00000100,
1283 0xd80c, 0xff000ff0, 0x00000100
1286 static void cik_init_golden_registers(struct radeon_device
*rdev
)
1288 switch (rdev
->family
) {
1290 radeon_program_register_sequence(rdev
,
1291 bonaire_mgcg_cgcg_init
,
1292 (const u32
)ARRAY_SIZE(bonaire_mgcg_cgcg_init
));
1293 radeon_program_register_sequence(rdev
,
1294 bonaire_golden_registers
,
1295 (const u32
)ARRAY_SIZE(bonaire_golden_registers
));
1296 radeon_program_register_sequence(rdev
,
1297 bonaire_golden_common_registers
,
1298 (const u32
)ARRAY_SIZE(bonaire_golden_common_registers
));
1299 radeon_program_register_sequence(rdev
,
1300 bonaire_golden_spm_registers
,
1301 (const u32
)ARRAY_SIZE(bonaire_golden_spm_registers
));
1304 radeon_program_register_sequence(rdev
,
1305 kalindi_mgcg_cgcg_init
,
1306 (const u32
)ARRAY_SIZE(kalindi_mgcg_cgcg_init
));
1307 radeon_program_register_sequence(rdev
,
1308 kalindi_golden_registers
,
1309 (const u32
)ARRAY_SIZE(kalindi_golden_registers
));
1310 radeon_program_register_sequence(rdev
,
1311 kalindi_golden_common_registers
,
1312 (const u32
)ARRAY_SIZE(kalindi_golden_common_registers
));
1313 radeon_program_register_sequence(rdev
,
1314 kalindi_golden_spm_registers
,
1315 (const u32
)ARRAY_SIZE(kalindi_golden_spm_registers
));
1318 radeon_program_register_sequence(rdev
,
1319 spectre_mgcg_cgcg_init
,
1320 (const u32
)ARRAY_SIZE(spectre_mgcg_cgcg_init
));
1321 radeon_program_register_sequence(rdev
,
1322 spectre_golden_registers
,
1323 (const u32
)ARRAY_SIZE(spectre_golden_registers
));
1324 radeon_program_register_sequence(rdev
,
1325 spectre_golden_common_registers
,
1326 (const u32
)ARRAY_SIZE(spectre_golden_common_registers
));
1327 radeon_program_register_sequence(rdev
,
1328 spectre_golden_spm_registers
,
1329 (const u32
)ARRAY_SIZE(spectre_golden_spm_registers
));
1337 * cik_get_xclk - get the xclk
1339 * @rdev: radeon_device pointer
1341 * Returns the reference clock used by the gfx engine
1344 u32
cik_get_xclk(struct radeon_device
*rdev
)
1346 u32 reference_clock
= rdev
->clock
.spll
.reference_freq
;
1348 if (rdev
->flags
& RADEON_IS_IGP
) {
1349 if (RREG32_SMC(GENERAL_PWRMGT
) & GPU_COUNTER_CLK
)
1350 return reference_clock
/ 2;
1352 if (RREG32_SMC(CG_CLKPIN_CNTL
) & XTALIN_DIVIDE
)
1353 return reference_clock
/ 4;
1355 return reference_clock
;
1359 * cik_mm_rdoorbell - read a doorbell dword
1361 * @rdev: radeon_device pointer
1362 * @offset: byte offset into the aperture
1364 * Returns the value in the doorbell aperture at the
1365 * requested offset (CIK).
1367 u32
cik_mm_rdoorbell(struct radeon_device
*rdev
, u32 offset
)
1369 if (offset
< rdev
->doorbell
.size
) {
1370 return readl(((void __iomem
*)rdev
->doorbell
.ptr
) + offset
);
1372 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset
);
1378 * cik_mm_wdoorbell - write a doorbell dword
1380 * @rdev: radeon_device pointer
1381 * @offset: byte offset into the aperture
1382 * @v: value to write
1384 * Writes @v to the doorbell aperture at the
1385 * requested offset (CIK).
1387 void cik_mm_wdoorbell(struct radeon_device
*rdev
, u32 offset
, u32 v
)
1389 if (offset
< rdev
->doorbell
.size
) {
1390 writel(v
, ((void __iomem
*)rdev
->doorbell
.ptr
) + offset
);
1392 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset
);
1396 #define BONAIRE_IO_MC_REGS_SIZE 36
1398 static const u32 bonaire_io_mc_regs
[BONAIRE_IO_MC_REGS_SIZE
][2] =
1400 {0x00000070, 0x04400000},
1401 {0x00000071, 0x80c01803},
1402 {0x00000072, 0x00004004},
1403 {0x00000073, 0x00000100},
1404 {0x00000074, 0x00ff0000},
1405 {0x00000075, 0x34000000},
1406 {0x00000076, 0x08000014},
1407 {0x00000077, 0x00cc08ec},
1408 {0x00000078, 0x00000400},
1409 {0x00000079, 0x00000000},
1410 {0x0000007a, 0x04090000},
1411 {0x0000007c, 0x00000000},
1412 {0x0000007e, 0x4408a8e8},
1413 {0x0000007f, 0x00000304},
1414 {0x00000080, 0x00000000},
1415 {0x00000082, 0x00000001},
1416 {0x00000083, 0x00000002},
1417 {0x00000084, 0xf3e4f400},
1418 {0x00000085, 0x052024e3},
1419 {0x00000087, 0x00000000},
1420 {0x00000088, 0x01000000},
1421 {0x0000008a, 0x1c0a0000},
1422 {0x0000008b, 0xff010000},
1423 {0x0000008d, 0xffffefff},
1424 {0x0000008e, 0xfff3efff},
1425 {0x0000008f, 0xfff3efbf},
1426 {0x00000092, 0xf7ffffff},
1427 {0x00000093, 0xffffff7f},
1428 {0x00000095, 0x00101101},
1429 {0x00000096, 0x00000fff},
1430 {0x00000097, 0x00116fff},
1431 {0x00000098, 0x60010000},
1432 {0x00000099, 0x10010000},
1433 {0x0000009a, 0x00006000},
1434 {0x0000009b, 0x00001000},
1435 {0x0000009f, 0x00b48000}
1439 * cik_srbm_select - select specific register instances
1441 * @rdev: radeon_device pointer
1442 * @me: selected ME (micro engine)
1447 * Switches the currently active registers instances. Some
1448 * registers are instanced per VMID, others are instanced per
1449 * me/pipe/queue combination.
1451 static void cik_srbm_select(struct radeon_device
*rdev
,
1452 u32 me
, u32 pipe
, u32 queue
, u32 vmid
)
1454 u32 srbm_gfx_cntl
= (PIPEID(pipe
& 0x3) |
1457 QUEUEID(queue
& 0x7));
1458 WREG32(SRBM_GFX_CNTL
, srbm_gfx_cntl
);
1463 * ci_mc_load_microcode - load MC ucode into the hw
1465 * @rdev: radeon_device pointer
1467 * Load the GDDR MC ucode into the hw (CIK).
1468 * Returns 0 on success, error on failure.
1470 static int ci_mc_load_microcode(struct radeon_device
*rdev
)
1472 const __be32
*fw_data
;
1473 u32 running
, blackout
= 0;
1475 int i
, ucode_size
, regs_size
;
1480 switch (rdev
->family
) {
1483 io_mc_regs
= (u32
*)&bonaire_io_mc_regs
;
1484 ucode_size
= CIK_MC_UCODE_SIZE
;
1485 regs_size
= BONAIRE_IO_MC_REGS_SIZE
;
1489 running
= RREG32(MC_SEQ_SUP_CNTL
) & RUN_MASK
;
1493 blackout
= RREG32(MC_SHARED_BLACKOUT_CNTL
);
1494 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
| 1);
1497 /* reset the engine and set to writable */
1498 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
1499 WREG32(MC_SEQ_SUP_CNTL
, 0x00000010);
1501 /* load mc io regs */
1502 for (i
= 0; i
< regs_size
; i
++) {
1503 WREG32(MC_SEQ_IO_DEBUG_INDEX
, io_mc_regs
[(i
<< 1)]);
1504 WREG32(MC_SEQ_IO_DEBUG_DATA
, io_mc_regs
[(i
<< 1) + 1]);
1506 /* load the MC ucode */
1507 fw_data
= (const __be32
*)rdev
->mc_fw
->data
;
1508 for (i
= 0; i
< ucode_size
; i
++)
1509 WREG32(MC_SEQ_SUP_PGM
, be32_to_cpup(fw_data
++));
1511 /* put the engine back into the active state */
1512 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
1513 WREG32(MC_SEQ_SUP_CNTL
, 0x00000004);
1514 WREG32(MC_SEQ_SUP_CNTL
, 0x00000001);
1516 /* wait for training to complete */
1517 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
1518 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D0
)
1522 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
1523 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D1
)
1529 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
);
1536 * cik_init_microcode - load ucode images from disk
1538 * @rdev: radeon_device pointer
1540 * Use the firmware interface to load the ucode images into
1541 * the driver (not loaded into hw).
1542 * Returns 0 on success, error on failure.
1544 static int cik_init_microcode(struct radeon_device
*rdev
)
1546 const char *chip_name
;
1547 size_t pfp_req_size
, me_req_size
, ce_req_size
,
1548 mec_req_size
, rlc_req_size
, mc_req_size
,
1549 sdma_req_size
, smc_req_size
;
1555 switch (rdev
->family
) {
1557 chip_name
= "BONAIRE";
1558 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
1559 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
1560 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
1561 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
1562 rlc_req_size
= BONAIRE_RLC_UCODE_SIZE
* 4;
1563 mc_req_size
= CIK_MC_UCODE_SIZE
* 4;
1564 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
1565 smc_req_size
= ALIGN(BONAIRE_SMC_UCODE_SIZE
, 4);
1568 chip_name
= "KAVERI";
1569 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
1570 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
1571 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
1572 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
1573 rlc_req_size
= KV_RLC_UCODE_SIZE
* 4;
1574 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
1577 chip_name
= "KABINI";
1578 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
1579 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
1580 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
1581 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
1582 rlc_req_size
= KB_RLC_UCODE_SIZE
* 4;
1583 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
1588 DRM_INFO("Loading %s Microcode\n", chip_name
);
1590 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_pfp.bin", chip_name
);
1591 err
= request_firmware(&rdev
->pfp_fw
, fw_name
, rdev
->dev
);
1594 if (rdev
->pfp_fw
->size
!= pfp_req_size
) {
1596 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1597 rdev
->pfp_fw
->size
, fw_name
);
1602 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_me.bin", chip_name
);
1603 err
= request_firmware(&rdev
->me_fw
, fw_name
, rdev
->dev
);
1606 if (rdev
->me_fw
->size
!= me_req_size
) {
1608 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1609 rdev
->me_fw
->size
, fw_name
);
1613 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_ce.bin", chip_name
);
1614 err
= request_firmware(&rdev
->ce_fw
, fw_name
, rdev
->dev
);
1617 if (rdev
->ce_fw
->size
!= ce_req_size
) {
1619 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1620 rdev
->ce_fw
->size
, fw_name
);
1624 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mec.bin", chip_name
);
1625 err
= request_firmware(&rdev
->mec_fw
, fw_name
, rdev
->dev
);
1628 if (rdev
->mec_fw
->size
!= mec_req_size
) {
1630 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1631 rdev
->mec_fw
->size
, fw_name
);
1635 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_rlc.bin", chip_name
);
1636 err
= request_firmware(&rdev
->rlc_fw
, fw_name
, rdev
->dev
);
1639 if (rdev
->rlc_fw
->size
!= rlc_req_size
) {
1641 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1642 rdev
->rlc_fw
->size
, fw_name
);
1646 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_sdma.bin", chip_name
);
1647 err
= request_firmware(&rdev
->sdma_fw
, fw_name
, rdev
->dev
);
1650 if (rdev
->sdma_fw
->size
!= sdma_req_size
) {
1652 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1653 rdev
->sdma_fw
->size
, fw_name
);
1657 /* No SMC, MC ucode on APUs */
1658 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
1659 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mc.bin", chip_name
);
1660 err
= request_firmware(&rdev
->mc_fw
, fw_name
, rdev
->dev
);
1663 if (rdev
->mc_fw
->size
!= mc_req_size
) {
1665 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1666 rdev
->mc_fw
->size
, fw_name
);
1670 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_smc.bin", chip_name
);
1671 err
= request_firmware(&rdev
->smc_fw
, fw_name
, rdev
->dev
);
1674 "smc: error loading firmware \"%s\"\n",
1676 release_firmware(rdev
->smc_fw
);
1677 rdev
->smc_fw
= NULL
;
1678 } else if (rdev
->smc_fw
->size
!= smc_req_size
) {
1680 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1681 rdev
->smc_fw
->size
, fw_name
);
1690 "cik_cp: Failed to load firmware \"%s\"\n",
1692 release_firmware(rdev
->pfp_fw
);
1693 rdev
->pfp_fw
= NULL
;
1694 release_firmware(rdev
->me_fw
);
1696 release_firmware(rdev
->ce_fw
);
1698 release_firmware(rdev
->rlc_fw
);
1699 rdev
->rlc_fw
= NULL
;
1700 release_firmware(rdev
->mc_fw
);
1702 release_firmware(rdev
->smc_fw
);
1703 rdev
->smc_fw
= NULL
;
1712 * cik_tiling_mode_table_init - init the hw tiling table
1714 * @rdev: radeon_device pointer
1716 * Starting with SI, the tiling setup is done globally in a
1717 * set of 32 tiling modes. Rather than selecting each set of
1718 * parameters per surface as on older asics, we just select
1719 * which index in the tiling table we want to use, and the
1720 * surface uses those parameters (CIK).
1722 static void cik_tiling_mode_table_init(struct radeon_device
*rdev
)
1724 const u32 num_tile_mode_states
= 32;
1725 const u32 num_secondary_tile_mode_states
= 16;
1726 u32 reg_offset
, gb_tile_moden
, split_equal_to_row_size
;
1727 u32 num_pipe_configs
;
1728 u32 num_rbs
= rdev
->config
.cik
.max_backends_per_se
*
1729 rdev
->config
.cik
.max_shader_engines
;
1731 switch (rdev
->config
.cik
.mem_row_size_in_kb
) {
1733 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_1KB
;
1737 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_2KB
;
1740 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_4KB
;
1744 num_pipe_configs
= rdev
->config
.cik
.max_tile_pipes
;
1745 if (num_pipe_configs
> 8)
1746 num_pipe_configs
= 8; /* ??? */
1748 if (num_pipe_configs
== 8) {
1749 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1750 switch (reg_offset
) {
1752 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1754 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1755 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
1758 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1760 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1761 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
1764 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1765 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1766 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1770 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1771 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1772 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1773 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
1776 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1777 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1778 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1779 TILE_SPLIT(split_equal_to_row_size
));
1782 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1786 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1788 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1789 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1792 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1793 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1794 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1795 TILE_SPLIT(split_equal_to_row_size
));
1798 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
1799 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
));
1802 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1803 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
1806 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1812 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1813 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1814 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1818 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1819 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1820 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1824 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1825 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
1828 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1829 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1830 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1831 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1834 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1835 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1836 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1840 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1841 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1842 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1846 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1847 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
1850 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1851 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1852 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1856 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1857 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1858 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
1859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1862 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1863 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1864 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
1865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1871 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
1872 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1874 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1875 switch (reg_offset
) {
1877 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1878 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1879 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1880 NUM_BANKS(ADDR_SURF_16_BANK
));
1883 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1886 NUM_BANKS(ADDR_SURF_16_BANK
));
1889 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1890 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1891 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1892 NUM_BANKS(ADDR_SURF_16_BANK
));
1895 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1898 NUM_BANKS(ADDR_SURF_16_BANK
));
1901 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1902 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1903 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1904 NUM_BANKS(ADDR_SURF_8_BANK
));
1907 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1908 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1909 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1910 NUM_BANKS(ADDR_SURF_4_BANK
));
1913 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1914 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1915 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1916 NUM_BANKS(ADDR_SURF_2_BANK
));
1919 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1922 NUM_BANKS(ADDR_SURF_16_BANK
));
1925 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1928 NUM_BANKS(ADDR_SURF_16_BANK
));
1931 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1934 NUM_BANKS(ADDR_SURF_16_BANK
));
1937 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1940 NUM_BANKS(ADDR_SURF_16_BANK
));
1943 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1946 NUM_BANKS(ADDR_SURF_8_BANK
));
1949 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1952 NUM_BANKS(ADDR_SURF_4_BANK
));
1955 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1958 NUM_BANKS(ADDR_SURF_2_BANK
));
1964 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1966 } else if (num_pipe_configs
== 4) {
1968 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1969 switch (reg_offset
) {
1971 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1972 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1973 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1974 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
1977 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1979 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
1983 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1984 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1985 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1986 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1989 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1991 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
1995 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1996 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1997 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
1998 TILE_SPLIT(split_equal_to_row_size
));
2001 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2005 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2007 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2011 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2013 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2014 TILE_SPLIT(split_equal_to_row_size
));
2017 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2018 PIPE_CONFIG(ADDR_SURF_P4_16x16
));
2021 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2022 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
2025 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2031 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2033 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2037 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2038 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2039 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2043 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2044 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
2047 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2048 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2049 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2053 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2055 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2059 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2060 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2061 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2065 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
2069 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2070 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2071 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2075 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2077 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2081 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2083 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
2084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2090 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
2091 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2093 } else if (num_rbs
< 4) {
2094 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
2095 switch (reg_offset
) {
2097 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2098 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2099 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
2103 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2104 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2105 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
2109 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2111 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2115 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2116 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2117 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
2121 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2123 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2124 TILE_SPLIT(split_equal_to_row_size
));
2127 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2128 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2131 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2132 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2133 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2134 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2137 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2139 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2140 TILE_SPLIT(split_equal_to_row_size
));
2143 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
2144 PIPE_CONFIG(ADDR_SURF_P4_8x16
));
2147 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
2151 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2157 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2159 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2163 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2165 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2169 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
2173 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2175 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2179 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2181 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2185 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2186 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2187 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2191 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
2195 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2197 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2201 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2202 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2203 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2207 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2208 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2209 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2216 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
2217 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2220 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
2221 switch (reg_offset
) {
2223 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2226 NUM_BANKS(ADDR_SURF_16_BANK
));
2229 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2232 NUM_BANKS(ADDR_SURF_16_BANK
));
2235 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2238 NUM_BANKS(ADDR_SURF_16_BANK
));
2241 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2244 NUM_BANKS(ADDR_SURF_16_BANK
));
2247 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2250 NUM_BANKS(ADDR_SURF_16_BANK
));
2253 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2256 NUM_BANKS(ADDR_SURF_8_BANK
));
2259 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2262 NUM_BANKS(ADDR_SURF_4_BANK
));
2265 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
2267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2268 NUM_BANKS(ADDR_SURF_16_BANK
));
2271 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2274 NUM_BANKS(ADDR_SURF_16_BANK
));
2277 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2280 NUM_BANKS(ADDR_SURF_16_BANK
));
2283 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2286 NUM_BANKS(ADDR_SURF_16_BANK
));
2289 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2292 NUM_BANKS(ADDR_SURF_16_BANK
));
2295 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2298 NUM_BANKS(ADDR_SURF_8_BANK
));
2301 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
2304 NUM_BANKS(ADDR_SURF_4_BANK
));
2310 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2312 } else if (num_pipe_configs
== 2) {
2313 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
2314 switch (reg_offset
) {
2316 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2318 PIPE_CONFIG(ADDR_SURF_P2
) |
2319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
2322 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2324 PIPE_CONFIG(ADDR_SURF_P2
) |
2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
2328 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2330 PIPE_CONFIG(ADDR_SURF_P2
) |
2331 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2334 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2336 PIPE_CONFIG(ADDR_SURF_P2
) |
2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
2340 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2342 PIPE_CONFIG(ADDR_SURF_P2
) |
2343 TILE_SPLIT(split_equal_to_row_size
));
2346 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
2350 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2352 PIPE_CONFIG(ADDR_SURF_P2
) |
2353 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
2356 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
2358 PIPE_CONFIG(ADDR_SURF_P2
) |
2359 TILE_SPLIT(split_equal_to_row_size
));
2362 gb_tile_moden
= ARRAY_MODE(ARRAY_LINEAR_ALIGNED
);
2365 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
2369 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2371 PIPE_CONFIG(ADDR_SURF_P2
) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2375 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2377 PIPE_CONFIG(ADDR_SURF_P2
) |
2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2381 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
2383 PIPE_CONFIG(ADDR_SURF_P2
) |
2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2387 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
2391 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2393 PIPE_CONFIG(ADDR_SURF_P2
) |
2394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2397 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2399 PIPE_CONFIG(ADDR_SURF_P2
) |
2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2403 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
2405 PIPE_CONFIG(ADDR_SURF_P2
) |
2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2409 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
2413 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2415 PIPE_CONFIG(ADDR_SURF_P2
) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2419 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2421 PIPE_CONFIG(ADDR_SURF_P2
) |
2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2425 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
2427 PIPE_CONFIG(ADDR_SURF_P2
) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
2434 rdev
->config
.cik
.tile_mode_array
[reg_offset
] = gb_tile_moden
;
2435 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2437 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
2438 switch (reg_offset
) {
2440 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2443 NUM_BANKS(ADDR_SURF_16_BANK
));
2446 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2449 NUM_BANKS(ADDR_SURF_16_BANK
));
2452 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2455 NUM_BANKS(ADDR_SURF_16_BANK
));
2458 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2461 NUM_BANKS(ADDR_SURF_16_BANK
));
2464 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2467 NUM_BANKS(ADDR_SURF_16_BANK
));
2470 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2473 NUM_BANKS(ADDR_SURF_16_BANK
));
2476 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2479 NUM_BANKS(ADDR_SURF_8_BANK
));
2482 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2485 NUM_BANKS(ADDR_SURF_16_BANK
));
2488 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2491 NUM_BANKS(ADDR_SURF_16_BANK
));
2494 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2497 NUM_BANKS(ADDR_SURF_16_BANK
));
2500 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
2501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2503 NUM_BANKS(ADDR_SURF_16_BANK
));
2506 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2509 NUM_BANKS(ADDR_SURF_16_BANK
));
2512 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
2515 NUM_BANKS(ADDR_SURF_16_BANK
));
2518 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
2519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
2520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
2521 NUM_BANKS(ADDR_SURF_8_BANK
));
2527 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
2530 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs
);
2534 * cik_select_se_sh - select which SE, SH to address
2536 * @rdev: radeon_device pointer
2537 * @se_num: shader engine to address
2538 * @sh_num: sh block to address
2540 * Select which SE, SH combinations to address. Certain
2541 * registers are instanced per SE or SH. 0xffffffff means
2542 * broadcast to all SEs or SHs (CIK).
2544 static void cik_select_se_sh(struct radeon_device
*rdev
,
2545 u32 se_num
, u32 sh_num
)
2547 u32 data
= INSTANCE_BROADCAST_WRITES
;
2549 if ((se_num
== 0xffffffff) && (sh_num
== 0xffffffff))
2550 data
|= SH_BROADCAST_WRITES
| SE_BROADCAST_WRITES
;
2551 else if (se_num
== 0xffffffff)
2552 data
|= SE_BROADCAST_WRITES
| SH_INDEX(sh_num
);
2553 else if (sh_num
== 0xffffffff)
2554 data
|= SH_BROADCAST_WRITES
| SE_INDEX(se_num
);
2556 data
|= SH_INDEX(sh_num
) | SE_INDEX(se_num
);
2557 WREG32(GRBM_GFX_INDEX
, data
);
2561 * cik_create_bitmask - create a bitmask
2563 * @bit_width: length of the mask
2565 * create a variable length bit mask (CIK).
2566 * Returns the bitmask.
2568 static u32
cik_create_bitmask(u32 bit_width
)
2572 for (i
= 0; i
< bit_width
; i
++) {
2580 * cik_select_se_sh - select which SE, SH to address
2582 * @rdev: radeon_device pointer
2583 * @max_rb_num: max RBs (render backends) for the asic
2584 * @se_num: number of SEs (shader engines) for the asic
2585 * @sh_per_se: number of SH blocks per SE for the asic
2587 * Calculates the bitmask of disabled RBs (CIK).
2588 * Returns the disabled RB bitmask.
2590 static u32
cik_get_rb_disabled(struct radeon_device
*rdev
,
2591 u32 max_rb_num
, u32 se_num
,
2596 data
= RREG32(CC_RB_BACKEND_DISABLE
);
2598 data
&= BACKEND_DISABLE_MASK
;
2601 data
|= RREG32(GC_USER_RB_BACKEND_DISABLE
);
2603 data
>>= BACKEND_DISABLE_SHIFT
;
2605 mask
= cik_create_bitmask(max_rb_num
/ se_num
/ sh_per_se
);
2611 * cik_setup_rb - setup the RBs on the asic
2613 * @rdev: radeon_device pointer
2614 * @se_num: number of SEs (shader engines) for the asic
2615 * @sh_per_se: number of SH blocks per SE for the asic
2616 * @max_rb_num: max RBs (render backends) for the asic
2618 * Configures per-SE/SH RB registers (CIK).
2620 static void cik_setup_rb(struct radeon_device
*rdev
,
2621 u32 se_num
, u32 sh_per_se
,
2626 u32 disabled_rbs
= 0;
2627 u32 enabled_rbs
= 0;
2629 for (i
= 0; i
< se_num
; i
++) {
2630 for (j
= 0; j
< sh_per_se
; j
++) {
2631 cik_select_se_sh(rdev
, i
, j
);
2632 data
= cik_get_rb_disabled(rdev
, max_rb_num
, se_num
, sh_per_se
);
2633 disabled_rbs
|= data
<< ((i
* sh_per_se
+ j
) * CIK_RB_BITMAP_WIDTH_PER_SH
);
2636 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
2639 for (i
= 0; i
< max_rb_num
; i
++) {
2640 if (!(disabled_rbs
& mask
))
2641 enabled_rbs
|= mask
;
2645 for (i
= 0; i
< se_num
; i
++) {
2646 cik_select_se_sh(rdev
, i
, 0xffffffff);
2648 for (j
= 0; j
< sh_per_se
; j
++) {
2649 switch (enabled_rbs
& 3) {
2651 data
|= (RASTER_CONFIG_RB_MAP_0
<< (i
* sh_per_se
+ j
) * 2);
2654 data
|= (RASTER_CONFIG_RB_MAP_3
<< (i
* sh_per_se
+ j
) * 2);
2658 data
|= (RASTER_CONFIG_RB_MAP_2
<< (i
* sh_per_se
+ j
) * 2);
2663 WREG32(PA_SC_RASTER_CONFIG
, data
);
2665 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
2669 * cik_gpu_init - setup the 3D engine
2671 * @rdev: radeon_device pointer
2673 * Configures the 3D engine and tiling configuration
2674 * registers so that the 3D engine is usable.
2676 static void cik_gpu_init(struct radeon_device
*rdev
)
2678 u32 gb_addr_config
= RREG32(GB_ADDR_CONFIG
);
2679 u32 mc_shared_chmap
, mc_arb_ramcfg
;
2680 u32 hdp_host_path_cntl
;
2684 switch (rdev
->family
) {
2686 rdev
->config
.cik
.max_shader_engines
= 2;
2687 rdev
->config
.cik
.max_tile_pipes
= 4;
2688 rdev
->config
.cik
.max_cu_per_sh
= 7;
2689 rdev
->config
.cik
.max_sh_per_se
= 1;
2690 rdev
->config
.cik
.max_backends_per_se
= 2;
2691 rdev
->config
.cik
.max_texture_channel_caches
= 4;
2692 rdev
->config
.cik
.max_gprs
= 256;
2693 rdev
->config
.cik
.max_gs_threads
= 32;
2694 rdev
->config
.cik
.max_hw_contexts
= 8;
2696 rdev
->config
.cik
.sc_prim_fifo_size_frontend
= 0x20;
2697 rdev
->config
.cik
.sc_prim_fifo_size_backend
= 0x100;
2698 rdev
->config
.cik
.sc_hiz_tile_fifo_size
= 0x30;
2699 rdev
->config
.cik
.sc_earlyz_tile_fifo_size
= 0x130;
2700 gb_addr_config
= BONAIRE_GB_ADDR_CONFIG_GOLDEN
;
2707 rdev
->config
.cik
.max_shader_engines
= 1;
2708 rdev
->config
.cik
.max_tile_pipes
= 2;
2709 rdev
->config
.cik
.max_cu_per_sh
= 2;
2710 rdev
->config
.cik
.max_sh_per_se
= 1;
2711 rdev
->config
.cik
.max_backends_per_se
= 1;
2712 rdev
->config
.cik
.max_texture_channel_caches
= 2;
2713 rdev
->config
.cik
.max_gprs
= 256;
2714 rdev
->config
.cik
.max_gs_threads
= 16;
2715 rdev
->config
.cik
.max_hw_contexts
= 8;
2717 rdev
->config
.cik
.sc_prim_fifo_size_frontend
= 0x20;
2718 rdev
->config
.cik
.sc_prim_fifo_size_backend
= 0x100;
2719 rdev
->config
.cik
.sc_hiz_tile_fifo_size
= 0x30;
2720 rdev
->config
.cik
.sc_earlyz_tile_fifo_size
= 0x130;
2721 gb_addr_config
= BONAIRE_GB_ADDR_CONFIG_GOLDEN
;
2725 /* Initialize HDP */
2726 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
2727 WREG32((0x2c14 + j
), 0x00000000);
2728 WREG32((0x2c18 + j
), 0x00000000);
2729 WREG32((0x2c1c + j
), 0x00000000);
2730 WREG32((0x2c20 + j
), 0x00000000);
2731 WREG32((0x2c24 + j
), 0x00000000);
2734 WREG32(GRBM_CNTL
, GRBM_READ_TIMEOUT(0xff));
2736 WREG32(BIF_FB_EN
, FB_READ_EN
| FB_WRITE_EN
);
2738 mc_shared_chmap
= RREG32(MC_SHARED_CHMAP
);
2739 mc_arb_ramcfg
= RREG32(MC_ARB_RAMCFG
);
2741 rdev
->config
.cik
.num_tile_pipes
= rdev
->config
.cik
.max_tile_pipes
;
2742 rdev
->config
.cik
.mem_max_burst_length_bytes
= 256;
2743 tmp
= (mc_arb_ramcfg
& NOOFCOLS_MASK
) >> NOOFCOLS_SHIFT
;
2744 rdev
->config
.cik
.mem_row_size_in_kb
= (4 * (1 << (8 + tmp
))) / 1024;
2745 if (rdev
->config
.cik
.mem_row_size_in_kb
> 4)
2746 rdev
->config
.cik
.mem_row_size_in_kb
= 4;
2747 /* XXX use MC settings? */
2748 rdev
->config
.cik
.shader_engine_tile_size
= 32;
2749 rdev
->config
.cik
.num_gpus
= 1;
2750 rdev
->config
.cik
.multi_gpu_tile_size
= 64;
2752 /* fix up row size */
2753 gb_addr_config
&= ~ROW_SIZE_MASK
;
2754 switch (rdev
->config
.cik
.mem_row_size_in_kb
) {
2757 gb_addr_config
|= ROW_SIZE(0);
2760 gb_addr_config
|= ROW_SIZE(1);
2763 gb_addr_config
|= ROW_SIZE(2);
2767 /* setup tiling info dword. gb_addr_config is not adequate since it does
2768 * not have bank info, so create a custom tiling dword.
2769 * bits 3:0 num_pipes
2770 * bits 7:4 num_banks
2771 * bits 11:8 group_size
2772 * bits 15:12 row_size
2774 rdev
->config
.cik
.tile_config
= 0;
2775 switch (rdev
->config
.cik
.num_tile_pipes
) {
2777 rdev
->config
.cik
.tile_config
|= (0 << 0);
2780 rdev
->config
.cik
.tile_config
|= (1 << 0);
2783 rdev
->config
.cik
.tile_config
|= (2 << 0);
2787 /* XXX what about 12? */
2788 rdev
->config
.cik
.tile_config
|= (3 << 0);
2791 if ((mc_arb_ramcfg
& NOOFBANK_MASK
) >> NOOFBANK_SHIFT
)
2792 rdev
->config
.cik
.tile_config
|= 1 << 4;
2794 rdev
->config
.cik
.tile_config
|= 0 << 4;
2795 rdev
->config
.cik
.tile_config
|=
2796 ((gb_addr_config
& PIPE_INTERLEAVE_SIZE_MASK
) >> PIPE_INTERLEAVE_SIZE_SHIFT
) << 8;
2797 rdev
->config
.cik
.tile_config
|=
2798 ((gb_addr_config
& ROW_SIZE_MASK
) >> ROW_SIZE_SHIFT
) << 12;
2800 WREG32(GB_ADDR_CONFIG
, gb_addr_config
);
2801 WREG32(HDP_ADDR_CONFIG
, gb_addr_config
);
2802 WREG32(DMIF_ADDR_CALC
, gb_addr_config
);
2803 WREG32(SDMA0_TILING_CONFIG
+ SDMA0_REGISTER_OFFSET
, gb_addr_config
& 0x70);
2804 WREG32(SDMA0_TILING_CONFIG
+ SDMA1_REGISTER_OFFSET
, gb_addr_config
& 0x70);
2805 WREG32(UVD_UDEC_ADDR_CONFIG
, gb_addr_config
);
2806 WREG32(UVD_UDEC_DB_ADDR_CONFIG
, gb_addr_config
);
2807 WREG32(UVD_UDEC_DBW_ADDR_CONFIG
, gb_addr_config
);
2809 cik_tiling_mode_table_init(rdev
);
2811 cik_setup_rb(rdev
, rdev
->config
.cik
.max_shader_engines
,
2812 rdev
->config
.cik
.max_sh_per_se
,
2813 rdev
->config
.cik
.max_backends_per_se
);
2815 /* set HW defaults for 3D engine */
2816 WREG32(CP_MEQ_THRESHOLDS
, MEQ1_START(0x30) | MEQ2_START(0x60));
2818 WREG32(SX_DEBUG_1
, 0x20);
2820 WREG32(TA_CNTL_AUX
, 0x00010000);
2822 tmp
= RREG32(SPI_CONFIG_CNTL
);
2824 WREG32(SPI_CONFIG_CNTL
, tmp
);
2826 WREG32(SQ_CONFIG
, 1);
2828 WREG32(DB_DEBUG
, 0);
2830 tmp
= RREG32(DB_DEBUG2
) & ~0xf00fffff;
2832 WREG32(DB_DEBUG2
, tmp
);
2834 tmp
= RREG32(DB_DEBUG3
) & ~0x0002021c;
2836 WREG32(DB_DEBUG3
, tmp
);
2838 tmp
= RREG32(CB_HW_CONTROL
) & ~0x00010000;
2840 WREG32(CB_HW_CONTROL
, tmp
);
2842 WREG32(SPI_CONFIG_CNTL_1
, VTX_DONE_DELAY(4));
2844 WREG32(PA_SC_FIFO_SIZE
, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev
->config
.cik
.sc_prim_fifo_size_frontend
) |
2845 SC_BACKEND_PRIM_FIFO_SIZE(rdev
->config
.cik
.sc_prim_fifo_size_backend
) |
2846 SC_HIZ_TILE_FIFO_SIZE(rdev
->config
.cik
.sc_hiz_tile_fifo_size
) |
2847 SC_EARLYZ_TILE_FIFO_SIZE(rdev
->config
.cik
.sc_earlyz_tile_fifo_size
)));
2849 WREG32(VGT_NUM_INSTANCES
, 1);
2851 WREG32(CP_PERFMON_CNTL
, 0);
2853 WREG32(SQ_CONFIG
, 0);
2855 WREG32(PA_SC_FORCE_EOV_MAX_CNTS
, (FORCE_EOV_MAX_CLK_CNT(4095) |
2856 FORCE_EOV_MAX_REZ_CNT(255)));
2858 WREG32(VGT_CACHE_INVALIDATION
, CACHE_INVALIDATION(VC_AND_TC
) |
2859 AUTO_INVLD_EN(ES_AND_GS_AUTO
));
2861 WREG32(VGT_GS_VERTEX_REUSE
, 16);
2862 WREG32(PA_SC_LINE_STIPPLE_STATE
, 0);
2864 tmp
= RREG32(HDP_MISC_CNTL
);
2865 tmp
|= HDP_FLUSH_INVALIDATE_CACHE
;
2866 WREG32(HDP_MISC_CNTL
, tmp
);
2868 hdp_host_path_cntl
= RREG32(HDP_HOST_PATH_CNTL
);
2869 WREG32(HDP_HOST_PATH_CNTL
, hdp_host_path_cntl
);
2871 WREG32(PA_CL_ENHANCE
, CLIP_VTX_REORDER_ENA
| NUM_CLIP_SEQ(3));
2872 WREG32(PA_SC_ENHANCE
, ENABLE_PA_SC_OUT_OF_ORDER
);
2878 * GPU scratch registers helpers function.
2881 * cik_scratch_init - setup driver info for CP scratch regs
2883 * @rdev: radeon_device pointer
2885 * Set up the number and offset of the CP scratch registers.
2886 * NOTE: use of CP scratch registers is a legacy inferface and
2887 * is not used by default on newer asics (r6xx+). On newer asics,
2888 * memory buffers are used for fences rather than scratch regs.
2890 static void cik_scratch_init(struct radeon_device
*rdev
)
2894 rdev
->scratch
.num_reg
= 7;
2895 rdev
->scratch
.reg_base
= SCRATCH_REG0
;
2896 for (i
= 0; i
< rdev
->scratch
.num_reg
; i
++) {
2897 rdev
->scratch
.free
[i
] = true;
2898 rdev
->scratch
.reg
[i
] = rdev
->scratch
.reg_base
+ (i
* 4);
2903 * cik_ring_test - basic gfx ring test
2905 * @rdev: radeon_device pointer
2906 * @ring: radeon_ring structure holding ring information
2908 * Allocate a scratch register and write to it using the gfx ring (CIK).
2909 * Provides a basic gfx ring test to verify that the ring is working.
2910 * Used by cik_cp_gfx_resume();
2911 * Returns 0 on success, error on failure.
2913 int cik_ring_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
2920 r
= radeon_scratch_get(rdev
, &scratch
);
2922 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r
);
2925 WREG32(scratch
, 0xCAFEDEAD);
2926 r
= radeon_ring_lock(rdev
, ring
, 3);
2928 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring
->idx
, r
);
2929 radeon_scratch_free(rdev
, scratch
);
2932 radeon_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
2933 radeon_ring_write(ring
, ((scratch
- PACKET3_SET_UCONFIG_REG_START
) >> 2));
2934 radeon_ring_write(ring
, 0xDEADBEEF);
2935 radeon_ring_unlock_commit(rdev
, ring
);
2937 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
2938 tmp
= RREG32(scratch
);
2939 if (tmp
== 0xDEADBEEF)
2943 if (i
< rdev
->usec_timeout
) {
2944 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
2946 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2947 ring
->idx
, scratch
, tmp
);
2950 radeon_scratch_free(rdev
, scratch
);
2955 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2957 * @rdev: radeon_device pointer
2958 * @fence: radeon fence object
2960 * Emits a fence sequnce number on the gfx ring and flushes
2963 void cik_fence_gfx_ring_emit(struct radeon_device
*rdev
,
2964 struct radeon_fence
*fence
)
2966 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
2967 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
2969 /* EVENT_WRITE_EOP - flush caches, send int */
2970 radeon_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE_EOP
, 4));
2971 radeon_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
2973 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
2975 radeon_ring_write(ring
, addr
& 0xfffffffc);
2976 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2977 radeon_ring_write(ring
, fence
->seq
);
2978 radeon_ring_write(ring
, 0);
2980 /* We should be using the new WAIT_REG_MEM special op packet here
2981 * but it causes the CP to hang
2983 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
2984 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
2985 WRITE_DATA_DST_SEL(0)));
2986 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
2987 radeon_ring_write(ring
, 0);
2988 radeon_ring_write(ring
, 0);
2992 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2994 * @rdev: radeon_device pointer
2995 * @fence: radeon fence object
2997 * Emits a fence sequnce number on the compute ring and flushes
3000 void cik_fence_compute_ring_emit(struct radeon_device
*rdev
,
3001 struct radeon_fence
*fence
)
3003 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
3004 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
3006 /* RELEASE_MEM - flush caches, send int */
3007 radeon_ring_write(ring
, PACKET3(PACKET3_RELEASE_MEM
, 5));
3008 radeon_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
3010 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
3012 radeon_ring_write(ring
, DATA_SEL(1) | INT_SEL(2));
3013 radeon_ring_write(ring
, addr
& 0xfffffffc);
3014 radeon_ring_write(ring
, upper_32_bits(addr
));
3015 radeon_ring_write(ring
, fence
->seq
);
3016 radeon_ring_write(ring
, 0);
3018 /* We should be using the new WAIT_REG_MEM special op packet here
3019 * but it causes the CP to hang
3021 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3022 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3023 WRITE_DATA_DST_SEL(0)));
3024 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
3025 radeon_ring_write(ring
, 0);
3026 radeon_ring_write(ring
, 0);
3029 void cik_semaphore_ring_emit(struct radeon_device
*rdev
,
3030 struct radeon_ring
*ring
,
3031 struct radeon_semaphore
*semaphore
,
3034 uint64_t addr
= semaphore
->gpu_addr
;
3035 unsigned sel
= emit_wait
? PACKET3_SEM_SEL_WAIT
: PACKET3_SEM_SEL_SIGNAL
;
3037 radeon_ring_write(ring
, PACKET3(PACKET3_MEM_SEMAPHORE
, 1));
3038 radeon_ring_write(ring
, addr
& 0xffffffff);
3039 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | sel
);
3046 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3048 * @rdev: radeon_device pointer
3049 * @ib: radeon indirect buffer object
3051 * Emits an DE (drawing engine) or CE (constant engine) IB
3052 * on the gfx ring. IBs are usually generated by userspace
3053 * acceleration drivers and submitted to the kernel for
3054 * sheduling on the ring. This function schedules the IB
3055 * on the gfx ring for execution by the GPU.
3057 void cik_ring_ib_execute(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
3059 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
3060 u32 header
, control
= INDIRECT_BUFFER_VALID
;
3062 if (ib
->is_const_ib
) {
3063 /* set switch buffer packet before const IB */
3064 radeon_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
3065 radeon_ring_write(ring
, 0);
3067 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
3070 if (ring
->rptr_save_reg
) {
3071 next_rptr
= ring
->wptr
+ 3 + 4;
3072 radeon_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
3073 radeon_ring_write(ring
, ((ring
->rptr_save_reg
-
3074 PACKET3_SET_UCONFIG_REG_START
) >> 2));
3075 radeon_ring_write(ring
, next_rptr
);
3076 } else if (rdev
->wb
.enabled
) {
3077 next_rptr
= ring
->wptr
+ 5 + 4;
3078 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3079 radeon_ring_write(ring
, WRITE_DATA_DST_SEL(1));
3080 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
3081 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
3082 radeon_ring_write(ring
, next_rptr
);
3085 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
3088 control
|= ib
->length_dw
|
3089 (ib
->vm
? (ib
->vm
->id
<< 24) : 0);
3091 radeon_ring_write(ring
, header
);
3092 radeon_ring_write(ring
,
3096 (ib
->gpu_addr
& 0xFFFFFFFC));
3097 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFFFF);
3098 radeon_ring_write(ring
, control
);
3102 * cik_ib_test - basic gfx ring IB test
3104 * @rdev: radeon_device pointer
3105 * @ring: radeon_ring structure holding ring information
3107 * Allocate an IB and execute it on the gfx ring (CIK).
3108 * Provides a basic gfx ring test to verify that IBs are working.
3109 * Returns 0 on success, error on failure.
3111 int cik_ib_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
3113 struct radeon_ib ib
;
3119 r
= radeon_scratch_get(rdev
, &scratch
);
3121 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r
);
3124 WREG32(scratch
, 0xCAFEDEAD);
3125 r
= radeon_ib_get(rdev
, ring
->idx
, &ib
, NULL
, 256);
3127 DRM_ERROR("radeon: failed to get ib (%d).\n", r
);
3130 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
3131 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
) >> 2);
3132 ib
.ptr
[2] = 0xDEADBEEF;
3134 r
= radeon_ib_schedule(rdev
, &ib
, NULL
);
3136 radeon_scratch_free(rdev
, scratch
);
3137 radeon_ib_free(rdev
, &ib
);
3138 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r
);
3141 r
= radeon_fence_wait(ib
.fence
, false);
3143 DRM_ERROR("radeon: fence wait failed (%d).\n", r
);
3146 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3147 tmp
= RREG32(scratch
);
3148 if (tmp
== 0xDEADBEEF)
3152 if (i
< rdev
->usec_timeout
) {
3153 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib
.fence
->ring
, i
);
3155 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3159 radeon_scratch_free(rdev
, scratch
);
3160 radeon_ib_free(rdev
, &ib
);
3166 * On CIK, gfx and compute now have independant command processors.
3169 * Gfx consists of a single ring and can process both gfx jobs and
3170 * compute jobs. The gfx CP consists of three microengines (ME):
3171 * PFP - Pre-Fetch Parser
3173 * CE - Constant Engine
3174 * The PFP and ME make up what is considered the Drawing Engine (DE).
3175 * The CE is an asynchronous engine used for updating buffer desciptors
3176 * used by the DE so that they can be loaded into cache in parallel
3177 * while the DE is processing state update packets.
3180 * The compute CP consists of two microengines (ME):
3181 * MEC1 - Compute MicroEngine 1
3182 * MEC2 - Compute MicroEngine 2
3183 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3184 * The queues are exposed to userspace and are programmed directly
3185 * by the compute runtime.
3188 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3190 * @rdev: radeon_device pointer
3191 * @enable: enable or disable the MEs
3193 * Halts or unhalts the gfx MEs.
3195 static void cik_cp_gfx_enable(struct radeon_device
*rdev
, bool enable
)
3198 WREG32(CP_ME_CNTL
, 0);
3200 WREG32(CP_ME_CNTL
, (CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
));
3201 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
3207 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3209 * @rdev: radeon_device pointer
3211 * Loads the gfx PFP, ME, and CE ucode.
3212 * Returns 0 for success, -EINVAL if the ucode is not available.
3214 static int cik_cp_gfx_load_microcode(struct radeon_device
*rdev
)
3216 const __be32
*fw_data
;
3219 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
)
3222 cik_cp_gfx_enable(rdev
, false);
3225 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
3226 WREG32(CP_PFP_UCODE_ADDR
, 0);
3227 for (i
= 0; i
< CIK_PFP_UCODE_SIZE
; i
++)
3228 WREG32(CP_PFP_UCODE_DATA
, be32_to_cpup(fw_data
++));
3229 WREG32(CP_PFP_UCODE_ADDR
, 0);
3232 fw_data
= (const __be32
*)rdev
->ce_fw
->data
;
3233 WREG32(CP_CE_UCODE_ADDR
, 0);
3234 for (i
= 0; i
< CIK_CE_UCODE_SIZE
; i
++)
3235 WREG32(CP_CE_UCODE_DATA
, be32_to_cpup(fw_data
++));
3236 WREG32(CP_CE_UCODE_ADDR
, 0);
3239 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
3240 WREG32(CP_ME_RAM_WADDR
, 0);
3241 for (i
= 0; i
< CIK_ME_UCODE_SIZE
; i
++)
3242 WREG32(CP_ME_RAM_DATA
, be32_to_cpup(fw_data
++));
3243 WREG32(CP_ME_RAM_WADDR
, 0);
3245 WREG32(CP_PFP_UCODE_ADDR
, 0);
3246 WREG32(CP_CE_UCODE_ADDR
, 0);
3247 WREG32(CP_ME_RAM_WADDR
, 0);
3248 WREG32(CP_ME_RAM_RADDR
, 0);
3253 * cik_cp_gfx_start - start the gfx ring
3255 * @rdev: radeon_device pointer
3257 * Enables the ring and loads the clear state context and other
3258 * packets required to init the ring.
3259 * Returns 0 for success, error for failure.
3261 static int cik_cp_gfx_start(struct radeon_device
*rdev
)
3263 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
3267 WREG32(CP_MAX_CONTEXT
, rdev
->config
.cik
.max_hw_contexts
- 1);
3268 WREG32(CP_ENDIAN_SWAP
, 0);
3269 WREG32(CP_DEVICE_ID
, 1);
3271 cik_cp_gfx_enable(rdev
, true);
3273 r
= radeon_ring_lock(rdev
, ring
, cik_default_size
+ 17);
3275 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r
);
3279 /* init the CE partitions. CE only used for gfx on CIK */
3280 radeon_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
3281 radeon_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
3282 radeon_ring_write(ring
, 0xc000);
3283 radeon_ring_write(ring
, 0xc000);
3285 /* setup clear context state */
3286 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
3287 radeon_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
3289 radeon_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
3290 radeon_ring_write(ring
, 0x80000000);
3291 radeon_ring_write(ring
, 0x80000000);
3293 for (i
= 0; i
< cik_default_size
; i
++)
3294 radeon_ring_write(ring
, cik_default_state
[i
]);
3296 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
3297 radeon_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
3299 /* set clear context state */
3300 radeon_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
3301 radeon_ring_write(ring
, 0);
3303 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
3304 radeon_ring_write(ring
, 0x00000316);
3305 radeon_ring_write(ring
, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3306 radeon_ring_write(ring
, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3308 radeon_ring_unlock_commit(rdev
, ring
);
3314 * cik_cp_gfx_fini - stop the gfx ring
3316 * @rdev: radeon_device pointer
3318 * Stop the gfx ring and tear down the driver ring
3321 static void cik_cp_gfx_fini(struct radeon_device
*rdev
)
3323 cik_cp_gfx_enable(rdev
, false);
3324 radeon_ring_fini(rdev
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
3328 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3330 * @rdev: radeon_device pointer
3332 * Program the location and size of the gfx ring buffer
3333 * and test it to make sure it's working.
3334 * Returns 0 for success, error for failure.
3336 static int cik_cp_gfx_resume(struct radeon_device
*rdev
)
3338 struct radeon_ring
*ring
;
3344 WREG32(CP_SEM_WAIT_TIMER
, 0x0);
3345 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL
, 0x0);
3347 /* Set the write pointer delay */
3348 WREG32(CP_RB_WPTR_DELAY
, 0);
3350 /* set the RB to use vmid 0 */
3351 WREG32(CP_RB_VMID
, 0);
3353 WREG32(SCRATCH_ADDR
, ((rdev
->wb
.gpu_addr
+ RADEON_WB_SCRATCH_OFFSET
) >> 8) & 0xFFFFFFFF);
3355 /* ring 0 - compute and gfx */
3356 /* Set ring buffer size */
3357 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
3358 rb_bufsz
= drm_order(ring
->ring_size
/ 8);
3359 tmp
= (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8) | rb_bufsz
;
3361 tmp
|= BUF_SWAP_32BIT
;
3363 WREG32(CP_RB0_CNTL
, tmp
);
3365 /* Initialize the ring buffer's read and write pointers */
3366 WREG32(CP_RB0_CNTL
, tmp
| RB_RPTR_WR_ENA
);
3368 WREG32(CP_RB0_WPTR
, ring
->wptr
);
3370 /* set the wb address wether it's enabled or not */
3371 WREG32(CP_RB0_RPTR_ADDR
, (rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFFFFFFFC);
3372 WREG32(CP_RB0_RPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFF);
3374 /* scratch register shadowing is no longer supported */
3375 WREG32(SCRATCH_UMSK
, 0);
3377 if (!rdev
->wb
.enabled
)
3378 tmp
|= RB_NO_UPDATE
;
3381 WREG32(CP_RB0_CNTL
, tmp
);
3383 rb_addr
= ring
->gpu_addr
>> 8;
3384 WREG32(CP_RB0_BASE
, rb_addr
);
3385 WREG32(CP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
3387 ring
->rptr
= RREG32(CP_RB0_RPTR
);
3389 /* start the ring */
3390 cik_cp_gfx_start(rdev
);
3391 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= true;
3392 r
= radeon_ring_test(rdev
, RADEON_RING_TYPE_GFX_INDEX
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
3394 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
3400 u32
cik_compute_ring_get_rptr(struct radeon_device
*rdev
,
3401 struct radeon_ring
*ring
)
3407 if (rdev
->wb
.enabled
) {
3408 rptr
= le32_to_cpu(rdev
->wb
.wb
[ring
->rptr_offs
/4]);
3410 mutex_lock(&rdev
->srbm_mutex
);
3411 cik_srbm_select(rdev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3412 rptr
= RREG32(CP_HQD_PQ_RPTR
);
3413 cik_srbm_select(rdev
, 0, 0, 0, 0);
3414 mutex_unlock(&rdev
->srbm_mutex
);
3416 rptr
= (rptr
& ring
->ptr_reg_mask
) >> ring
->ptr_reg_shift
;
3421 u32
cik_compute_ring_get_wptr(struct radeon_device
*rdev
,
3422 struct radeon_ring
*ring
)
3426 if (rdev
->wb
.enabled
) {
3427 wptr
= le32_to_cpu(rdev
->wb
.wb
[ring
->wptr_offs
/4]);
3429 mutex_lock(&rdev
->srbm_mutex
);
3430 cik_srbm_select(rdev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
3431 wptr
= RREG32(CP_HQD_PQ_WPTR
);
3432 cik_srbm_select(rdev
, 0, 0, 0, 0);
3433 mutex_unlock(&rdev
->srbm_mutex
);
3435 wptr
= (wptr
& ring
->ptr_reg_mask
) >> ring
->ptr_reg_shift
;
3440 void cik_compute_ring_set_wptr(struct radeon_device
*rdev
,
3441 struct radeon_ring
*ring
)
3443 u32 wptr
= (ring
->wptr
<< ring
->ptr_reg_shift
) & ring
->ptr_reg_mask
;
3445 rdev
->wb
.wb
[ring
->wptr_offs
/4] = cpu_to_le32(wptr
);
3446 WDOORBELL32(ring
->doorbell_offset
, wptr
);
3450 * cik_cp_compute_enable - enable/disable the compute CP MEs
3452 * @rdev: radeon_device pointer
3453 * @enable: enable or disable the MEs
3455 * Halts or unhalts the compute MEs.
3457 static void cik_cp_compute_enable(struct radeon_device
*rdev
, bool enable
)
3460 WREG32(CP_MEC_CNTL
, 0);
3462 WREG32(CP_MEC_CNTL
, (MEC_ME1_HALT
| MEC_ME2_HALT
));
3467 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3469 * @rdev: radeon_device pointer
3471 * Loads the compute MEC1&2 ucode.
3472 * Returns 0 for success, -EINVAL if the ucode is not available.
3474 static int cik_cp_compute_load_microcode(struct radeon_device
*rdev
)
3476 const __be32
*fw_data
;
3482 cik_cp_compute_enable(rdev
, false);
3485 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
3486 WREG32(CP_MEC_ME1_UCODE_ADDR
, 0);
3487 for (i
= 0; i
< CIK_MEC_UCODE_SIZE
; i
++)
3488 WREG32(CP_MEC_ME1_UCODE_DATA
, be32_to_cpup(fw_data
++));
3489 WREG32(CP_MEC_ME1_UCODE_ADDR
, 0);
3491 if (rdev
->family
== CHIP_KAVERI
) {
3493 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
3494 WREG32(CP_MEC_ME2_UCODE_ADDR
, 0);
3495 for (i
= 0; i
< CIK_MEC_UCODE_SIZE
; i
++)
3496 WREG32(CP_MEC_ME2_UCODE_DATA
, be32_to_cpup(fw_data
++));
3497 WREG32(CP_MEC_ME2_UCODE_ADDR
, 0);
3504 * cik_cp_compute_start - start the compute queues
3506 * @rdev: radeon_device pointer
3508 * Enable the compute queues.
3509 * Returns 0 for success, error for failure.
3511 static int cik_cp_compute_start(struct radeon_device
*rdev
)
3513 cik_cp_compute_enable(rdev
, true);
3519 * cik_cp_compute_fini - stop the compute queues
3521 * @rdev: radeon_device pointer
3523 * Stop the compute queues and tear down the driver queue
3526 static void cik_cp_compute_fini(struct radeon_device
*rdev
)
3530 cik_cp_compute_enable(rdev
, false);
3532 for (i
= 0; i
< 2; i
++) {
3534 idx
= CAYMAN_RING_TYPE_CP1_INDEX
;
3536 idx
= CAYMAN_RING_TYPE_CP2_INDEX
;
3538 if (rdev
->ring
[idx
].mqd_obj
) {
3539 r
= radeon_bo_reserve(rdev
->ring
[idx
].mqd_obj
, false);
3540 if (unlikely(r
!= 0))
3541 dev_warn(rdev
->dev
, "(%d) reserve MQD bo failed\n", r
);
3543 radeon_bo_unpin(rdev
->ring
[idx
].mqd_obj
);
3544 radeon_bo_unreserve(rdev
->ring
[idx
].mqd_obj
);
3546 radeon_bo_unref(&rdev
->ring
[idx
].mqd_obj
);
3547 rdev
->ring
[idx
].mqd_obj
= NULL
;
3552 static void cik_mec_fini(struct radeon_device
*rdev
)
3556 if (rdev
->mec
.hpd_eop_obj
) {
3557 r
= radeon_bo_reserve(rdev
->mec
.hpd_eop_obj
, false);
3558 if (unlikely(r
!= 0))
3559 dev_warn(rdev
->dev
, "(%d) reserve HPD EOP bo failed\n", r
);
3560 radeon_bo_unpin(rdev
->mec
.hpd_eop_obj
);
3561 radeon_bo_unreserve(rdev
->mec
.hpd_eop_obj
);
3563 radeon_bo_unref(&rdev
->mec
.hpd_eop_obj
);
3564 rdev
->mec
.hpd_eop_obj
= NULL
;
3568 #define MEC_HPD_SIZE 2048
3570 static int cik_mec_init(struct radeon_device
*rdev
)
3576 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3577 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3579 if (rdev
->family
== CHIP_KAVERI
)
3580 rdev
->mec
.num_mec
= 2;
3582 rdev
->mec
.num_mec
= 1;
3583 rdev
->mec
.num_pipe
= 4;
3584 rdev
->mec
.num_queue
= rdev
->mec
.num_mec
* rdev
->mec
.num_pipe
* 8;
3586 if (rdev
->mec
.hpd_eop_obj
== NULL
) {
3587 r
= radeon_bo_create(rdev
,
3588 rdev
->mec
.num_mec
*rdev
->mec
.num_pipe
* MEC_HPD_SIZE
* 2,
3590 RADEON_GEM_DOMAIN_GTT
, NULL
,
3591 &rdev
->mec
.hpd_eop_obj
);
3593 dev_warn(rdev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
3598 r
= radeon_bo_reserve(rdev
->mec
.hpd_eop_obj
, false);
3599 if (unlikely(r
!= 0)) {
3603 r
= radeon_bo_pin(rdev
->mec
.hpd_eop_obj
, RADEON_GEM_DOMAIN_GTT
,
3604 &rdev
->mec
.hpd_eop_gpu_addr
);
3606 dev_warn(rdev
->dev
, "(%d) pin HDP EOP bo failed\n", r
);
3610 r
= radeon_bo_kmap(rdev
->mec
.hpd_eop_obj
, (void **)&hpd
);
3612 dev_warn(rdev
->dev
, "(%d) map HDP EOP bo failed\n", r
);
3617 /* clear memory. Not sure if this is required or not */
3618 memset(hpd
, 0, rdev
->mec
.num_mec
*rdev
->mec
.num_pipe
* MEC_HPD_SIZE
* 2);
3620 radeon_bo_kunmap(rdev
->mec
.hpd_eop_obj
);
3621 radeon_bo_unreserve(rdev
->mec
.hpd_eop_obj
);
3626 struct hqd_registers
3628 u32 cp_mqd_base_addr
;
3629 u32 cp_mqd_base_addr_hi
;
3632 u32 cp_hqd_persistent_state
;
3633 u32 cp_hqd_pipe_priority
;
3634 u32 cp_hqd_queue_priority
;
3637 u32 cp_hqd_pq_base_hi
;
3639 u32 cp_hqd_pq_rptr_report_addr
;
3640 u32 cp_hqd_pq_rptr_report_addr_hi
;
3641 u32 cp_hqd_pq_wptr_poll_addr
;
3642 u32 cp_hqd_pq_wptr_poll_addr_hi
;
3643 u32 cp_hqd_pq_doorbell_control
;
3645 u32 cp_hqd_pq_control
;
3646 u32 cp_hqd_ib_base_addr
;
3647 u32 cp_hqd_ib_base_addr_hi
;
3649 u32 cp_hqd_ib_control
;
3650 u32 cp_hqd_iq_timer
;
3652 u32 cp_hqd_dequeue_request
;
3653 u32 cp_hqd_dma_offload
;
3654 u32 cp_hqd_sema_cmd
;
3655 u32 cp_hqd_msg_type
;
3656 u32 cp_hqd_atomic0_preop_lo
;
3657 u32 cp_hqd_atomic0_preop_hi
;
3658 u32 cp_hqd_atomic1_preop_lo
;
3659 u32 cp_hqd_atomic1_preop_hi
;
3660 u32 cp_hqd_hq_scheduler0
;
3661 u32 cp_hqd_hq_scheduler1
;
3668 u32 dispatch_initiator
;
3672 u32 pipeline_stat_enable
;
3673 u32 perf_counter_enable
;
3679 u32 resource_limits
;
3680 u32 static_thread_mgmt01
[2];
3682 u32 static_thread_mgmt23
[2];
3684 u32 thread_trace_enable
;
3687 u32 vgtcs_invoke_count
[2];
3688 struct hqd_registers queue_state
;
3690 u32 interrupt_queue
[64];
3694 * cik_cp_compute_resume - setup the compute queue registers
3696 * @rdev: radeon_device pointer
3698 * Program the compute queues and test them to make sure they
3700 * Returns 0 for success, error for failure.
3702 static int cik_cp_compute_resume(struct radeon_device
*rdev
)
3706 bool use_doorbell
= true;
3712 struct bonaire_mqd
*mqd
;
3714 r
= cik_cp_compute_start(rdev
);
3718 /* fix up chicken bits */
3719 tmp
= RREG32(CP_CPF_DEBUG
);
3721 WREG32(CP_CPF_DEBUG
, tmp
);
3723 /* init the pipes */
3724 mutex_lock(&rdev
->srbm_mutex
);
3725 for (i
= 0; i
< (rdev
->mec
.num_pipe
* rdev
->mec
.num_mec
); i
++) {
3726 int me
= (i
< 4) ? 1 : 2;
3727 int pipe
= (i
< 4) ? i
: (i
- 4);
3729 eop_gpu_addr
= rdev
->mec
.hpd_eop_gpu_addr
+ (i
* MEC_HPD_SIZE
* 2);
3731 cik_srbm_select(rdev
, me
, pipe
, 0, 0);
3733 /* write the EOP addr */
3734 WREG32(CP_HPD_EOP_BASE_ADDR
, eop_gpu_addr
>> 8);
3735 WREG32(CP_HPD_EOP_BASE_ADDR_HI
, upper_32_bits(eop_gpu_addr
) >> 8);
3737 /* set the VMID assigned */
3738 WREG32(CP_HPD_EOP_VMID
, 0);
3740 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3741 tmp
= RREG32(CP_HPD_EOP_CONTROL
);
3742 tmp
&= ~EOP_SIZE_MASK
;
3743 tmp
|= drm_order(MEC_HPD_SIZE
/ 8);
3744 WREG32(CP_HPD_EOP_CONTROL
, tmp
);
3746 cik_srbm_select(rdev
, 0, 0, 0, 0);
3747 mutex_unlock(&rdev
->srbm_mutex
);
3749 /* init the queues. Just two for now. */
3750 for (i
= 0; i
< 2; i
++) {
3752 idx
= CAYMAN_RING_TYPE_CP1_INDEX
;
3754 idx
= CAYMAN_RING_TYPE_CP2_INDEX
;
3756 if (rdev
->ring
[idx
].mqd_obj
== NULL
) {
3757 r
= radeon_bo_create(rdev
,
3758 sizeof(struct bonaire_mqd
),
3760 RADEON_GEM_DOMAIN_GTT
, NULL
,
3761 &rdev
->ring
[idx
].mqd_obj
);
3763 dev_warn(rdev
->dev
, "(%d) create MQD bo failed\n", r
);
3768 r
= radeon_bo_reserve(rdev
->ring
[idx
].mqd_obj
, false);
3769 if (unlikely(r
!= 0)) {
3770 cik_cp_compute_fini(rdev
);
3773 r
= radeon_bo_pin(rdev
->ring
[idx
].mqd_obj
, RADEON_GEM_DOMAIN_GTT
,
3776 dev_warn(rdev
->dev
, "(%d) pin MQD bo failed\n", r
);
3777 cik_cp_compute_fini(rdev
);
3780 r
= radeon_bo_kmap(rdev
->ring
[idx
].mqd_obj
, (void **)&buf
);
3782 dev_warn(rdev
->dev
, "(%d) map MQD bo failed\n", r
);
3783 cik_cp_compute_fini(rdev
);
3787 /* doorbell offset */
3788 rdev
->ring
[idx
].doorbell_offset
=
3789 (rdev
->ring
[idx
].doorbell_page_num
* PAGE_SIZE
) + 0;
3791 /* init the mqd struct */
3792 memset(buf
, 0, sizeof(struct bonaire_mqd
));
3794 mqd
= (struct bonaire_mqd
*)buf
;
3795 mqd
->header
= 0xC0310800;
3796 mqd
->static_thread_mgmt01
[0] = 0xffffffff;
3797 mqd
->static_thread_mgmt01
[1] = 0xffffffff;
3798 mqd
->static_thread_mgmt23
[0] = 0xffffffff;
3799 mqd
->static_thread_mgmt23
[1] = 0xffffffff;
3801 mutex_lock(&rdev
->srbm_mutex
);
3802 cik_srbm_select(rdev
, rdev
->ring
[idx
].me
,
3803 rdev
->ring
[idx
].pipe
,
3804 rdev
->ring
[idx
].queue
, 0);
3806 /* disable wptr polling */
3807 tmp
= RREG32(CP_PQ_WPTR_POLL_CNTL
);
3808 tmp
&= ~WPTR_POLL_EN
;
3809 WREG32(CP_PQ_WPTR_POLL_CNTL
, tmp
);
3811 /* enable doorbell? */
3812 mqd
->queue_state
.cp_hqd_pq_doorbell_control
=
3813 RREG32(CP_HQD_PQ_DOORBELL_CONTROL
);
3815 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|= DOORBELL_EN
;
3817 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&= ~DOORBELL_EN
;
3818 WREG32(CP_HQD_PQ_DOORBELL_CONTROL
,
3819 mqd
->queue_state
.cp_hqd_pq_doorbell_control
);
3821 /* disable the queue if it's active */
3822 mqd
->queue_state
.cp_hqd_dequeue_request
= 0;
3823 mqd
->queue_state
.cp_hqd_pq_rptr
= 0;
3824 mqd
->queue_state
.cp_hqd_pq_wptr
= 0;
3825 if (RREG32(CP_HQD_ACTIVE
) & 1) {
3826 WREG32(CP_HQD_DEQUEUE_REQUEST
, 1);
3827 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3828 if (!(RREG32(CP_HQD_ACTIVE
) & 1))
3832 WREG32(CP_HQD_DEQUEUE_REQUEST
, mqd
->queue_state
.cp_hqd_dequeue_request
);
3833 WREG32(CP_HQD_PQ_RPTR
, mqd
->queue_state
.cp_hqd_pq_rptr
);
3834 WREG32(CP_HQD_PQ_WPTR
, mqd
->queue_state
.cp_hqd_pq_wptr
);
3837 /* set the pointer to the MQD */
3838 mqd
->queue_state
.cp_mqd_base_addr
= mqd_gpu_addr
& 0xfffffffc;
3839 mqd
->queue_state
.cp_mqd_base_addr_hi
= upper_32_bits(mqd_gpu_addr
);
3840 WREG32(CP_MQD_BASE_ADDR
, mqd
->queue_state
.cp_mqd_base_addr
);
3841 WREG32(CP_MQD_BASE_ADDR_HI
, mqd
->queue_state
.cp_mqd_base_addr_hi
);
3842 /* set MQD vmid to 0 */
3843 mqd
->queue_state
.cp_mqd_control
= RREG32(CP_MQD_CONTROL
);
3844 mqd
->queue_state
.cp_mqd_control
&= ~MQD_VMID_MASK
;
3845 WREG32(CP_MQD_CONTROL
, mqd
->queue_state
.cp_mqd_control
);
3847 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3848 hqd_gpu_addr
= rdev
->ring
[idx
].gpu_addr
>> 8;
3849 mqd
->queue_state
.cp_hqd_pq_base
= hqd_gpu_addr
;
3850 mqd
->queue_state
.cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
3851 WREG32(CP_HQD_PQ_BASE
, mqd
->queue_state
.cp_hqd_pq_base
);
3852 WREG32(CP_HQD_PQ_BASE_HI
, mqd
->queue_state
.cp_hqd_pq_base_hi
);
3854 /* set up the HQD, this is similar to CP_RB0_CNTL */
3855 mqd
->queue_state
.cp_hqd_pq_control
= RREG32(CP_HQD_PQ_CONTROL
);
3856 mqd
->queue_state
.cp_hqd_pq_control
&=
3857 ~(QUEUE_SIZE_MASK
| RPTR_BLOCK_SIZE_MASK
);
3859 mqd
->queue_state
.cp_hqd_pq_control
|=
3860 drm_order(rdev
->ring
[idx
].ring_size
/ 8);
3861 mqd
->queue_state
.cp_hqd_pq_control
|=
3862 (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8);
3864 mqd
->queue_state
.cp_hqd_pq_control
|= BUF_SWAP_32BIT
;
3866 mqd
->queue_state
.cp_hqd_pq_control
&=
3867 ~(UNORD_DISPATCH
| ROQ_PQ_IB_FLIP
| PQ_VOLATILE
);
3868 mqd
->queue_state
.cp_hqd_pq_control
|=
3869 PRIV_STATE
| KMD_QUEUE
; /* assuming kernel queue control */
3870 WREG32(CP_HQD_PQ_CONTROL
, mqd
->queue_state
.cp_hqd_pq_control
);
3872 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3874 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ CIK_WB_CP1_WPTR_OFFSET
;
3876 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ CIK_WB_CP2_WPTR_OFFSET
;
3877 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr
= wb_gpu_addr
& 0xfffffffc;
3878 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
3879 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR
, mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr
);
3880 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI
,
3881 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr_hi
);
3883 /* set the wb address wether it's enabled or not */
3885 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP1_RPTR_OFFSET
;
3887 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP2_RPTR_OFFSET
;
3888 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr
= wb_gpu_addr
& 0xfffffffc;
3889 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr_hi
=
3890 upper_32_bits(wb_gpu_addr
) & 0xffff;
3891 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR
,
3892 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr
);
3893 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
3894 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr_hi
);
3896 /* enable the doorbell if requested */
3898 mqd
->queue_state
.cp_hqd_pq_doorbell_control
=
3899 RREG32(CP_HQD_PQ_DOORBELL_CONTROL
);
3900 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&= ~DOORBELL_OFFSET_MASK
;
3901 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|=
3902 DOORBELL_OFFSET(rdev
->ring
[idx
].doorbell_offset
/ 4);
3903 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|= DOORBELL_EN
;
3904 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&=
3905 ~(DOORBELL_SOURCE
| DOORBELL_HIT
);
3908 mqd
->queue_state
.cp_hqd_pq_doorbell_control
= 0;
3910 WREG32(CP_HQD_PQ_DOORBELL_CONTROL
,
3911 mqd
->queue_state
.cp_hqd_pq_doorbell_control
);
3913 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3914 rdev
->ring
[idx
].wptr
= 0;
3915 mqd
->queue_state
.cp_hqd_pq_wptr
= rdev
->ring
[idx
].wptr
;
3916 WREG32(CP_HQD_PQ_WPTR
, mqd
->queue_state
.cp_hqd_pq_wptr
);
3917 rdev
->ring
[idx
].rptr
= RREG32(CP_HQD_PQ_RPTR
);
3918 mqd
->queue_state
.cp_hqd_pq_rptr
= rdev
->ring
[idx
].rptr
;
3920 /* set the vmid for the queue */
3921 mqd
->queue_state
.cp_hqd_vmid
= 0;
3922 WREG32(CP_HQD_VMID
, mqd
->queue_state
.cp_hqd_vmid
);
3924 /* activate the queue */
3925 mqd
->queue_state
.cp_hqd_active
= 1;
3926 WREG32(CP_HQD_ACTIVE
, mqd
->queue_state
.cp_hqd_active
);
3928 cik_srbm_select(rdev
, 0, 0, 0, 0);
3929 mutex_unlock(&rdev
->srbm_mutex
);
3931 radeon_bo_kunmap(rdev
->ring
[idx
].mqd_obj
);
3932 radeon_bo_unreserve(rdev
->ring
[idx
].mqd_obj
);
3934 rdev
->ring
[idx
].ready
= true;
3935 r
= radeon_ring_test(rdev
, idx
, &rdev
->ring
[idx
]);
3937 rdev
->ring
[idx
].ready
= false;
3943 static void cik_cp_enable(struct radeon_device
*rdev
, bool enable
)
3945 cik_cp_gfx_enable(rdev
, enable
);
3946 cik_cp_compute_enable(rdev
, enable
);
3949 static int cik_cp_load_microcode(struct radeon_device
*rdev
)
3953 r
= cik_cp_gfx_load_microcode(rdev
);
3956 r
= cik_cp_compute_load_microcode(rdev
);
3963 static void cik_cp_fini(struct radeon_device
*rdev
)
3965 cik_cp_gfx_fini(rdev
);
3966 cik_cp_compute_fini(rdev
);
3969 static int cik_cp_resume(struct radeon_device
*rdev
)
3973 /* Reset all cp blocks */
3974 WREG32(GRBM_SOFT_RESET
, SOFT_RESET_CP
);
3975 RREG32(GRBM_SOFT_RESET
);
3977 WREG32(GRBM_SOFT_RESET
, 0);
3978 RREG32(GRBM_SOFT_RESET
);
3980 r
= cik_cp_load_microcode(rdev
);
3984 r
= cik_cp_gfx_resume(rdev
);
3987 r
= cik_cp_compute_resume(rdev
);
3996 * Starting with CIK, the GPU has new asynchronous
3997 * DMA engines. These engines are used for compute
3998 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3999 * and each one supports 1 ring buffer used for gfx
4000 * and 2 queues used for compute.
4002 * The programming model is very similar to the CP
4003 * (ring buffer, IBs, etc.), but sDMA has it's own
4004 * packet format that is different from the PM4 format
4005 * used by the CP. sDMA supports copying data, writing
4006 * embedded data, solid fills, and a number of other
4007 * things. It also has support for tiling/detiling of
4011 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
4013 * @rdev: radeon_device pointer
4014 * @ib: IB object to schedule
4016 * Schedule an IB in the DMA ring (CIK).
4018 void cik_sdma_ring_ib_execute(struct radeon_device
*rdev
,
4019 struct radeon_ib
*ib
)
4021 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
4022 u32 extra_bits
= (ib
->vm
? ib
->vm
->id
: 0) & 0xf;
4024 if (rdev
->wb
.enabled
) {
4025 u32 next_rptr
= ring
->wptr
+ 5;
4026 while ((next_rptr
& 7) != 4)
4029 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0));
4030 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
4031 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
4032 radeon_ring_write(ring
, 1); /* number of DWs to follow */
4033 radeon_ring_write(ring
, next_rptr
);
4036 /* IB packet must end on a 8 DW boundary */
4037 while ((ring
->wptr
& 7) != 4)
4038 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
4039 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER
, 0, extra_bits
));
4040 radeon_ring_write(ring
, ib
->gpu_addr
& 0xffffffe0); /* base must be 32 byte aligned */
4041 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xffffffff);
4042 radeon_ring_write(ring
, ib
->length_dw
);
4047 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4049 * @rdev: radeon_device pointer
4050 * @fence: radeon fence object
4052 * Add a DMA fence packet to the ring to write
4053 * the fence seq number and DMA trap packet to generate
4054 * an interrupt if needed (CIK).
4056 void cik_sdma_fence_ring_emit(struct radeon_device
*rdev
,
4057 struct radeon_fence
*fence
)
4059 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
4060 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
4061 u32 extra_bits
= (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4062 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4065 if (fence
->ring
== R600_RING_TYPE_DMA_INDEX
)
4066 ref_and_mask
= SDMA0
;
4068 ref_and_mask
= SDMA1
;
4070 /* write the fence */
4071 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_FENCE
, 0, 0));
4072 radeon_ring_write(ring
, addr
& 0xffffffff);
4073 radeon_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
4074 radeon_ring_write(ring
, fence
->seq
);
4075 /* generate an interrupt */
4076 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_TRAP
, 0, 0));
4078 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM
, 0, extra_bits
));
4079 radeon_ring_write(ring
, GPU_HDP_FLUSH_DONE
);
4080 radeon_ring_write(ring
, GPU_HDP_FLUSH_REQ
);
4081 radeon_ring_write(ring
, ref_and_mask
); /* REFERENCE */
4082 radeon_ring_write(ring
, ref_and_mask
); /* MASK */
4083 radeon_ring_write(ring
, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4087 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4089 * @rdev: radeon_device pointer
4090 * @ring: radeon_ring structure holding ring information
4091 * @semaphore: radeon semaphore object
4092 * @emit_wait: wait or signal semaphore
4094 * Add a DMA semaphore packet to the ring wait on or signal
4095 * other rings (CIK).
4097 void cik_sdma_semaphore_ring_emit(struct radeon_device
*rdev
,
4098 struct radeon_ring
*ring
,
4099 struct radeon_semaphore
*semaphore
,
4102 u64 addr
= semaphore
->gpu_addr
;
4103 u32 extra_bits
= emit_wait
? 0 : SDMA_SEMAPHORE_EXTRA_S
;
4105 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE
, 0, extra_bits
));
4106 radeon_ring_write(ring
, addr
& 0xfffffff8);
4107 radeon_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
4111 * cik_sdma_gfx_stop - stop the gfx async dma engines
4113 * @rdev: radeon_device pointer
4115 * Stop the gfx async dma ring buffers (CIK).
4117 static void cik_sdma_gfx_stop(struct radeon_device
*rdev
)
4119 u32 rb_cntl
, reg_offset
;
4122 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
4124 for (i
= 0; i
< 2; i
++) {
4126 reg_offset
= SDMA0_REGISTER_OFFSET
;
4128 reg_offset
= SDMA1_REGISTER_OFFSET
;
4129 rb_cntl
= RREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
);
4130 rb_cntl
&= ~SDMA_RB_ENABLE
;
4131 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
);
4132 WREG32(SDMA0_GFX_IB_CNTL
+ reg_offset
, 0);
4137 * cik_sdma_rlc_stop - stop the compute async dma engines
4139 * @rdev: radeon_device pointer
4141 * Stop the compute async dma queues (CIK).
4143 static void cik_sdma_rlc_stop(struct radeon_device
*rdev
)
4149 * cik_sdma_enable - stop the async dma engines
4151 * @rdev: radeon_device pointer
4152 * @enable: enable/disable the DMA MEs.
4154 * Halt or unhalt the async dma engines (CIK).
4156 static void cik_sdma_enable(struct radeon_device
*rdev
, bool enable
)
4158 u32 me_cntl
, reg_offset
;
4161 for (i
= 0; i
< 2; i
++) {
4163 reg_offset
= SDMA0_REGISTER_OFFSET
;
4165 reg_offset
= SDMA1_REGISTER_OFFSET
;
4166 me_cntl
= RREG32(SDMA0_ME_CNTL
+ reg_offset
);
4168 me_cntl
&= ~SDMA_HALT
;
4170 me_cntl
|= SDMA_HALT
;
4171 WREG32(SDMA0_ME_CNTL
+ reg_offset
, me_cntl
);
4176 * cik_sdma_gfx_resume - setup and start the async dma engines
4178 * @rdev: radeon_device pointer
4180 * Set up the gfx DMA ring buffers and enable them (CIK).
4181 * Returns 0 for success, error for failure.
4183 static int cik_sdma_gfx_resume(struct radeon_device
*rdev
)
4185 struct radeon_ring
*ring
;
4186 u32 rb_cntl
, ib_cntl
;
4188 u32 reg_offset
, wb_offset
;
4191 for (i
= 0; i
< 2; i
++) {
4193 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
4194 reg_offset
= SDMA0_REGISTER_OFFSET
;
4195 wb_offset
= R600_WB_DMA_RPTR_OFFSET
;
4197 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
4198 reg_offset
= SDMA1_REGISTER_OFFSET
;
4199 wb_offset
= CAYMAN_WB_DMA1_RPTR_OFFSET
;
4202 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL
+ reg_offset
, 0);
4203 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL
+ reg_offset
, 0);
4205 /* Set ring buffer size in dwords */
4206 rb_bufsz
= drm_order(ring
->ring_size
/ 4);
4207 rb_cntl
= rb_bufsz
<< 1;
4209 rb_cntl
|= SDMA_RB_SWAP_ENABLE
| SDMA_RPTR_WRITEBACK_SWAP_ENABLE
;
4211 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
);
4213 /* Initialize the ring buffer's read and write pointers */
4214 WREG32(SDMA0_GFX_RB_RPTR
+ reg_offset
, 0);
4215 WREG32(SDMA0_GFX_RB_WPTR
+ reg_offset
, 0);
4217 /* set the wb address whether it's enabled or not */
4218 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI
+ reg_offset
,
4219 upper_32_bits(rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFF);
4220 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO
+ reg_offset
,
4221 ((rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFC));
4223 if (rdev
->wb
.enabled
)
4224 rb_cntl
|= SDMA_RPTR_WRITEBACK_ENABLE
;
4226 WREG32(SDMA0_GFX_RB_BASE
+ reg_offset
, ring
->gpu_addr
>> 8);
4227 WREG32(SDMA0_GFX_RB_BASE_HI
+ reg_offset
, ring
->gpu_addr
>> 40);
4230 WREG32(SDMA0_GFX_RB_WPTR
+ reg_offset
, ring
->wptr
<< 2);
4232 ring
->rptr
= RREG32(SDMA0_GFX_RB_RPTR
+ reg_offset
) >> 2;
4235 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
| SDMA_RB_ENABLE
);
4237 ib_cntl
= SDMA_IB_ENABLE
;
4239 ib_cntl
|= SDMA_IB_SWAP_ENABLE
;
4241 /* enable DMA IBs */
4242 WREG32(SDMA0_GFX_IB_CNTL
+ reg_offset
, ib_cntl
);
4246 r
= radeon_ring_test(rdev
, ring
->idx
, ring
);
4248 ring
->ready
= false;
4253 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.real_vram_size
);
4259 * cik_sdma_rlc_resume - setup and start the async dma engines
4261 * @rdev: radeon_device pointer
4263 * Set up the compute DMA queues and enable them (CIK).
4264 * Returns 0 for success, error for failure.
4266 static int cik_sdma_rlc_resume(struct radeon_device
*rdev
)
4273 * cik_sdma_load_microcode - load the sDMA ME ucode
4275 * @rdev: radeon_device pointer
4277 * Loads the sDMA0/1 ucode.
4278 * Returns 0 for success, -EINVAL if the ucode is not available.
4280 static int cik_sdma_load_microcode(struct radeon_device
*rdev
)
4282 const __be32
*fw_data
;
4288 /* stop the gfx rings and rlc compute queues */
4289 cik_sdma_gfx_stop(rdev
);
4290 cik_sdma_rlc_stop(rdev
);
4293 cik_sdma_enable(rdev
, false);
4296 fw_data
= (const __be32
*)rdev
->sdma_fw
->data
;
4297 WREG32(SDMA0_UCODE_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
4298 for (i
= 0; i
< CIK_SDMA_UCODE_SIZE
; i
++)
4299 WREG32(SDMA0_UCODE_DATA
+ SDMA0_REGISTER_OFFSET
, be32_to_cpup(fw_data
++));
4300 WREG32(SDMA0_UCODE_DATA
+ SDMA0_REGISTER_OFFSET
, CIK_SDMA_UCODE_VERSION
);
4303 fw_data
= (const __be32
*)rdev
->sdma_fw
->data
;
4304 WREG32(SDMA0_UCODE_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
4305 for (i
= 0; i
< CIK_SDMA_UCODE_SIZE
; i
++)
4306 WREG32(SDMA0_UCODE_DATA
+ SDMA1_REGISTER_OFFSET
, be32_to_cpup(fw_data
++));
4307 WREG32(SDMA0_UCODE_DATA
+ SDMA1_REGISTER_OFFSET
, CIK_SDMA_UCODE_VERSION
);
4309 WREG32(SDMA0_UCODE_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
4310 WREG32(SDMA0_UCODE_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
4315 * cik_sdma_resume - setup and start the async dma engines
4317 * @rdev: radeon_device pointer
4319 * Set up the DMA engines and enable them (CIK).
4320 * Returns 0 for success, error for failure.
4322 static int cik_sdma_resume(struct radeon_device
*rdev
)
4327 WREG32(SRBM_SOFT_RESET
, SOFT_RESET_SDMA
| SOFT_RESET_SDMA1
);
4328 RREG32(SRBM_SOFT_RESET
);
4330 WREG32(SRBM_SOFT_RESET
, 0);
4331 RREG32(SRBM_SOFT_RESET
);
4333 r
= cik_sdma_load_microcode(rdev
);
4337 /* unhalt the MEs */
4338 cik_sdma_enable(rdev
, true);
4340 /* start the gfx rings and rlc compute queues */
4341 r
= cik_sdma_gfx_resume(rdev
);
4344 r
= cik_sdma_rlc_resume(rdev
);
4352 * cik_sdma_fini - tear down the async dma engines
4354 * @rdev: radeon_device pointer
4356 * Stop the async dma engines and free the rings (CIK).
4358 static void cik_sdma_fini(struct radeon_device
*rdev
)
4360 /* stop the gfx rings and rlc compute queues */
4361 cik_sdma_gfx_stop(rdev
);
4362 cik_sdma_rlc_stop(rdev
);
4364 cik_sdma_enable(rdev
, false);
4365 radeon_ring_fini(rdev
, &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
]);
4366 radeon_ring_fini(rdev
, &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
]);
4367 /* XXX - compute dma queue tear down */
4371 * cik_copy_dma - copy pages using the DMA engine
4373 * @rdev: radeon_device pointer
4374 * @src_offset: src GPU address
4375 * @dst_offset: dst GPU address
4376 * @num_gpu_pages: number of GPU pages to xfer
4377 * @fence: radeon fence object
4379 * Copy GPU paging using the DMA engine (CIK).
4380 * Used by the radeon ttm implementation to move pages if
4381 * registered as the asic copy callback.
4383 int cik_copy_dma(struct radeon_device
*rdev
,
4384 uint64_t src_offset
, uint64_t dst_offset
,
4385 unsigned num_gpu_pages
,
4386 struct radeon_fence
**fence
)
4388 struct radeon_semaphore
*sem
= NULL
;
4389 int ring_index
= rdev
->asic
->copy
.dma_ring_index
;
4390 struct radeon_ring
*ring
= &rdev
->ring
[ring_index
];
4391 u32 size_in_bytes
, cur_size_in_bytes
;
4395 r
= radeon_semaphore_create(rdev
, &sem
);
4397 DRM_ERROR("radeon: moving bo (%d).\n", r
);
4401 size_in_bytes
= (num_gpu_pages
<< RADEON_GPU_PAGE_SHIFT
);
4402 num_loops
= DIV_ROUND_UP(size_in_bytes
, 0x1fffff);
4403 r
= radeon_ring_lock(rdev
, ring
, num_loops
* 7 + 14);
4405 DRM_ERROR("radeon: moving bo (%d).\n", r
);
4406 radeon_semaphore_free(rdev
, &sem
, NULL
);
4410 if (radeon_fence_need_sync(*fence
, ring
->idx
)) {
4411 radeon_semaphore_sync_rings(rdev
, sem
, (*fence
)->ring
,
4413 radeon_fence_note_sync(*fence
, ring
->idx
);
4415 radeon_semaphore_free(rdev
, &sem
, NULL
);
4418 for (i
= 0; i
< num_loops
; i
++) {
4419 cur_size_in_bytes
= size_in_bytes
;
4420 if (cur_size_in_bytes
> 0x1fffff)
4421 cur_size_in_bytes
= 0x1fffff;
4422 size_in_bytes
-= cur_size_in_bytes
;
4423 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_COPY
, SDMA_COPY_SUB_OPCODE_LINEAR
, 0));
4424 radeon_ring_write(ring
, cur_size_in_bytes
);
4425 radeon_ring_write(ring
, 0); /* src/dst endian swap */
4426 radeon_ring_write(ring
, src_offset
& 0xffffffff);
4427 radeon_ring_write(ring
, upper_32_bits(src_offset
) & 0xffffffff);
4428 radeon_ring_write(ring
, dst_offset
& 0xfffffffc);
4429 radeon_ring_write(ring
, upper_32_bits(dst_offset
) & 0xffffffff);
4430 src_offset
+= cur_size_in_bytes
;
4431 dst_offset
+= cur_size_in_bytes
;
4434 r
= radeon_fence_emit(rdev
, fence
, ring
->idx
);
4436 radeon_ring_unlock_undo(rdev
, ring
);
4440 radeon_ring_unlock_commit(rdev
, ring
);
4441 radeon_semaphore_free(rdev
, &sem
, *fence
);
4447 * cik_sdma_ring_test - simple async dma engine test
4449 * @rdev: radeon_device pointer
4450 * @ring: radeon_ring structure holding ring information
4452 * Test the DMA engine by writing using it to write an
4453 * value to memory. (CIK).
4454 * Returns 0 for success, error for failure.
4456 int cik_sdma_ring_test(struct radeon_device
*rdev
,
4457 struct radeon_ring
*ring
)
4461 void __iomem
*ptr
= (void *)rdev
->vram_scratch
.ptr
;
4465 DRM_ERROR("invalid vram scratch pointer\n");
4472 r
= radeon_ring_lock(rdev
, ring
, 4);
4474 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring
->idx
, r
);
4477 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0));
4478 radeon_ring_write(ring
, rdev
->vram_scratch
.gpu_addr
& 0xfffffffc);
4479 radeon_ring_write(ring
, upper_32_bits(rdev
->vram_scratch
.gpu_addr
) & 0xffffffff);
4480 radeon_ring_write(ring
, 1); /* number of DWs to follow */
4481 radeon_ring_write(ring
, 0xDEADBEEF);
4482 radeon_ring_unlock_commit(rdev
, ring
);
4484 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
4486 if (tmp
== 0xDEADBEEF)
4491 if (i
< rdev
->usec_timeout
) {
4492 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
4494 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4502 * cik_sdma_ib_test - test an IB on the DMA engine
4504 * @rdev: radeon_device pointer
4505 * @ring: radeon_ring structure holding ring information
4507 * Test a simple IB in the DMA ring (CIK).
4508 * Returns 0 on success, error on failure.
4510 int cik_sdma_ib_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
4512 struct radeon_ib ib
;
4515 void __iomem
*ptr
= (void *)rdev
->vram_scratch
.ptr
;
4519 DRM_ERROR("invalid vram scratch pointer\n");
4526 r
= radeon_ib_get(rdev
, ring
->idx
, &ib
, NULL
, 256);
4528 DRM_ERROR("radeon: failed to get ib (%d).\n", r
);
4532 ib
.ptr
[0] = SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0);
4533 ib
.ptr
[1] = rdev
->vram_scratch
.gpu_addr
& 0xfffffffc;
4534 ib
.ptr
[2] = upper_32_bits(rdev
->vram_scratch
.gpu_addr
) & 0xffffffff;
4536 ib
.ptr
[4] = 0xDEADBEEF;
4539 r
= radeon_ib_schedule(rdev
, &ib
, NULL
);
4541 radeon_ib_free(rdev
, &ib
);
4542 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r
);
4545 r
= radeon_fence_wait(ib
.fence
, false);
4547 DRM_ERROR("radeon: fence wait failed (%d).\n", r
);
4550 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
4552 if (tmp
== 0xDEADBEEF)
4556 if (i
< rdev
->usec_timeout
) {
4557 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib
.fence
->ring
, i
);
4559 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp
);
4562 radeon_ib_free(rdev
, &ib
);
4567 static void cik_print_gpu_status_regs(struct radeon_device
*rdev
)
4569 dev_info(rdev
->dev
, " GRBM_STATUS=0x%08X\n",
4570 RREG32(GRBM_STATUS
));
4571 dev_info(rdev
->dev
, " GRBM_STATUS2=0x%08X\n",
4572 RREG32(GRBM_STATUS2
));
4573 dev_info(rdev
->dev
, " GRBM_STATUS_SE0=0x%08X\n",
4574 RREG32(GRBM_STATUS_SE0
));
4575 dev_info(rdev
->dev
, " GRBM_STATUS_SE1=0x%08X\n",
4576 RREG32(GRBM_STATUS_SE1
));
4577 dev_info(rdev
->dev
, " GRBM_STATUS_SE2=0x%08X\n",
4578 RREG32(GRBM_STATUS_SE2
));
4579 dev_info(rdev
->dev
, " GRBM_STATUS_SE3=0x%08X\n",
4580 RREG32(GRBM_STATUS_SE3
));
4581 dev_info(rdev
->dev
, " SRBM_STATUS=0x%08X\n",
4582 RREG32(SRBM_STATUS
));
4583 dev_info(rdev
->dev
, " SRBM_STATUS2=0x%08X\n",
4584 RREG32(SRBM_STATUS2
));
4585 dev_info(rdev
->dev
, " SDMA0_STATUS_REG = 0x%08X\n",
4586 RREG32(SDMA0_STATUS_REG
+ SDMA0_REGISTER_OFFSET
));
4587 dev_info(rdev
->dev
, " SDMA1_STATUS_REG = 0x%08X\n",
4588 RREG32(SDMA0_STATUS_REG
+ SDMA1_REGISTER_OFFSET
));
4589 dev_info(rdev
->dev
, " CP_STAT = 0x%08x\n", RREG32(CP_STAT
));
4590 dev_info(rdev
->dev
, " CP_STALLED_STAT1 = 0x%08x\n",
4591 RREG32(CP_STALLED_STAT1
));
4592 dev_info(rdev
->dev
, " CP_STALLED_STAT2 = 0x%08x\n",
4593 RREG32(CP_STALLED_STAT2
));
4594 dev_info(rdev
->dev
, " CP_STALLED_STAT3 = 0x%08x\n",
4595 RREG32(CP_STALLED_STAT3
));
4596 dev_info(rdev
->dev
, " CP_CPF_BUSY_STAT = 0x%08x\n",
4597 RREG32(CP_CPF_BUSY_STAT
));
4598 dev_info(rdev
->dev
, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4599 RREG32(CP_CPF_STALLED_STAT1
));
4600 dev_info(rdev
->dev
, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS
));
4601 dev_info(rdev
->dev
, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT
));
4602 dev_info(rdev
->dev
, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4603 RREG32(CP_CPC_STALLED_STAT1
));
4604 dev_info(rdev
->dev
, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS
));
4608 * cik_gpu_check_soft_reset - check which blocks are busy
4610 * @rdev: radeon_device pointer
4612 * Check which blocks are busy and return the relevant reset
4613 * mask to be used by cik_gpu_soft_reset().
4614 * Returns a mask of the blocks to be reset.
4616 static u32
cik_gpu_check_soft_reset(struct radeon_device
*rdev
)
4622 tmp
= RREG32(GRBM_STATUS
);
4623 if (tmp
& (PA_BUSY
| SC_BUSY
|
4624 BCI_BUSY
| SX_BUSY
|
4625 TA_BUSY
| VGT_BUSY
|
4627 GDS_BUSY
| SPI_BUSY
|
4628 IA_BUSY
| IA_BUSY_NO_DMA
))
4629 reset_mask
|= RADEON_RESET_GFX
;
4631 if (tmp
& (CP_BUSY
| CP_COHERENCY_BUSY
))
4632 reset_mask
|= RADEON_RESET_CP
;
4635 tmp
= RREG32(GRBM_STATUS2
);
4637 reset_mask
|= RADEON_RESET_RLC
;
4639 /* SDMA0_STATUS_REG */
4640 tmp
= RREG32(SDMA0_STATUS_REG
+ SDMA0_REGISTER_OFFSET
);
4641 if (!(tmp
& SDMA_IDLE
))
4642 reset_mask
|= RADEON_RESET_DMA
;
4644 /* SDMA1_STATUS_REG */
4645 tmp
= RREG32(SDMA0_STATUS_REG
+ SDMA1_REGISTER_OFFSET
);
4646 if (!(tmp
& SDMA_IDLE
))
4647 reset_mask
|= RADEON_RESET_DMA1
;
4650 tmp
= RREG32(SRBM_STATUS2
);
4651 if (tmp
& SDMA_BUSY
)
4652 reset_mask
|= RADEON_RESET_DMA
;
4654 if (tmp
& SDMA1_BUSY
)
4655 reset_mask
|= RADEON_RESET_DMA1
;
4658 tmp
= RREG32(SRBM_STATUS
);
4661 reset_mask
|= RADEON_RESET_IH
;
4664 reset_mask
|= RADEON_RESET_SEM
;
4666 if (tmp
& GRBM_RQ_PENDING
)
4667 reset_mask
|= RADEON_RESET_GRBM
;
4670 reset_mask
|= RADEON_RESET_VMC
;
4672 if (tmp
& (MCB_BUSY
| MCB_NON_DISPLAY_BUSY
|
4673 MCC_BUSY
| MCD_BUSY
))
4674 reset_mask
|= RADEON_RESET_MC
;
4676 if (evergreen_is_display_hung(rdev
))
4677 reset_mask
|= RADEON_RESET_DISPLAY
;
4679 /* Skip MC reset as it's mostly likely not hung, just busy */
4680 if (reset_mask
& RADEON_RESET_MC
) {
4681 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask
);
4682 reset_mask
&= ~RADEON_RESET_MC
;
4689 * cik_gpu_soft_reset - soft reset GPU
4691 * @rdev: radeon_device pointer
4692 * @reset_mask: mask of which blocks to reset
4694 * Soft reset the blocks specified in @reset_mask.
4696 static void cik_gpu_soft_reset(struct radeon_device
*rdev
, u32 reset_mask
)
4698 struct evergreen_mc_save save
;
4699 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
4702 if (reset_mask
== 0)
4705 dev_info(rdev
->dev
, "GPU softreset: 0x%08X\n", reset_mask
);
4707 cik_print_gpu_status_regs(rdev
);
4708 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4709 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
));
4710 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4711 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
));
4716 /* Disable GFX parsing/prefetching */
4717 WREG32(CP_ME_CNTL
, CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
);
4719 /* Disable MEC parsing/prefetching */
4720 WREG32(CP_MEC_CNTL
, MEC_ME1_HALT
| MEC_ME2_HALT
);
4722 if (reset_mask
& RADEON_RESET_DMA
) {
4724 tmp
= RREG32(SDMA0_ME_CNTL
+ SDMA0_REGISTER_OFFSET
);
4726 WREG32(SDMA0_ME_CNTL
+ SDMA0_REGISTER_OFFSET
, tmp
);
4728 if (reset_mask
& RADEON_RESET_DMA1
) {
4730 tmp
= RREG32(SDMA0_ME_CNTL
+ SDMA1_REGISTER_OFFSET
);
4732 WREG32(SDMA0_ME_CNTL
+ SDMA1_REGISTER_OFFSET
, tmp
);
4735 evergreen_mc_stop(rdev
, &save
);
4736 if (evergreen_mc_wait_for_idle(rdev
)) {
4737 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
4740 if (reset_mask
& (RADEON_RESET_GFX
| RADEON_RESET_COMPUTE
| RADEON_RESET_CP
))
4741 grbm_soft_reset
= SOFT_RESET_CP
| SOFT_RESET_GFX
;
4743 if (reset_mask
& RADEON_RESET_CP
) {
4744 grbm_soft_reset
|= SOFT_RESET_CP
;
4746 srbm_soft_reset
|= SOFT_RESET_GRBM
;
4749 if (reset_mask
& RADEON_RESET_DMA
)
4750 srbm_soft_reset
|= SOFT_RESET_SDMA
;
4752 if (reset_mask
& RADEON_RESET_DMA1
)
4753 srbm_soft_reset
|= SOFT_RESET_SDMA1
;
4755 if (reset_mask
& RADEON_RESET_DISPLAY
)
4756 srbm_soft_reset
|= SOFT_RESET_DC
;
4758 if (reset_mask
& RADEON_RESET_RLC
)
4759 grbm_soft_reset
|= SOFT_RESET_RLC
;
4761 if (reset_mask
& RADEON_RESET_SEM
)
4762 srbm_soft_reset
|= SOFT_RESET_SEM
;
4764 if (reset_mask
& RADEON_RESET_IH
)
4765 srbm_soft_reset
|= SOFT_RESET_IH
;
4767 if (reset_mask
& RADEON_RESET_GRBM
)
4768 srbm_soft_reset
|= SOFT_RESET_GRBM
;
4770 if (reset_mask
& RADEON_RESET_VMC
)
4771 srbm_soft_reset
|= SOFT_RESET_VMC
;
4773 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
4774 if (reset_mask
& RADEON_RESET_MC
)
4775 srbm_soft_reset
|= SOFT_RESET_MC
;
4778 if (grbm_soft_reset
) {
4779 tmp
= RREG32(GRBM_SOFT_RESET
);
4780 tmp
|= grbm_soft_reset
;
4781 dev_info(rdev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
4782 WREG32(GRBM_SOFT_RESET
, tmp
);
4783 tmp
= RREG32(GRBM_SOFT_RESET
);
4787 tmp
&= ~grbm_soft_reset
;
4788 WREG32(GRBM_SOFT_RESET
, tmp
);
4789 tmp
= RREG32(GRBM_SOFT_RESET
);
4792 if (srbm_soft_reset
) {
4793 tmp
= RREG32(SRBM_SOFT_RESET
);
4794 tmp
|= srbm_soft_reset
;
4795 dev_info(rdev
->dev
, "SRBM_SOFT_RESET=0x%08X\n", tmp
);
4796 WREG32(SRBM_SOFT_RESET
, tmp
);
4797 tmp
= RREG32(SRBM_SOFT_RESET
);
4801 tmp
&= ~srbm_soft_reset
;
4802 WREG32(SRBM_SOFT_RESET
, tmp
);
4803 tmp
= RREG32(SRBM_SOFT_RESET
);
4806 /* Wait a little for things to settle down */
4809 evergreen_mc_resume(rdev
, &save
);
4812 cik_print_gpu_status_regs(rdev
);
4816 * cik_asic_reset - soft reset GPU
4818 * @rdev: radeon_device pointer
4820 * Look up which blocks are hung and attempt
4822 * Returns 0 for success.
4824 int cik_asic_reset(struct radeon_device
*rdev
)
4828 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4831 r600_set_bios_scratch_engine_hung(rdev
, true);
4833 cik_gpu_soft_reset(rdev
, reset_mask
);
4835 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4838 r600_set_bios_scratch_engine_hung(rdev
, false);
4844 * cik_gfx_is_lockup - check if the 3D engine is locked up
4846 * @rdev: radeon_device pointer
4847 * @ring: radeon_ring structure holding ring information
4849 * Check if the 3D engine is locked up (CIK).
4850 * Returns true if the engine is locked, false if not.
4852 bool cik_gfx_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
4854 u32 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4856 if (!(reset_mask
& (RADEON_RESET_GFX
|
4857 RADEON_RESET_COMPUTE
|
4858 RADEON_RESET_CP
))) {
4859 radeon_ring_lockup_update(ring
);
4862 /* force CP activities */
4863 radeon_ring_force_activity(rdev
, ring
);
4864 return radeon_ring_test_lockup(rdev
, ring
);
4868 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4870 * @rdev: radeon_device pointer
4871 * @ring: radeon_ring structure holding ring information
4873 * Check if the async DMA engine is locked up (CIK).
4874 * Returns true if the engine appears to be locked up, false if not.
4876 bool cik_sdma_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
4878 u32 reset_mask
= cik_gpu_check_soft_reset(rdev
);
4881 if (ring
->idx
== R600_RING_TYPE_DMA_INDEX
)
4882 mask
= RADEON_RESET_DMA
;
4884 mask
= RADEON_RESET_DMA1
;
4886 if (!(reset_mask
& mask
)) {
4887 radeon_ring_lockup_update(ring
);
4890 /* force ring activities */
4891 radeon_ring_force_activity(rdev
, ring
);
4892 return radeon_ring_test_lockup(rdev
, ring
);
4897 * cik_mc_program - program the GPU memory controller
4899 * @rdev: radeon_device pointer
4901 * Set the location of vram, gart, and AGP in the GPU's
4902 * physical address space (CIK).
4904 static void cik_mc_program(struct radeon_device
*rdev
)
4906 struct evergreen_mc_save save
;
4910 /* Initialize HDP */
4911 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
4912 WREG32((0x2c14 + j
), 0x00000000);
4913 WREG32((0x2c18 + j
), 0x00000000);
4914 WREG32((0x2c1c + j
), 0x00000000);
4915 WREG32((0x2c20 + j
), 0x00000000);
4916 WREG32((0x2c24 + j
), 0x00000000);
4918 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL
, 0);
4920 evergreen_mc_stop(rdev
, &save
);
4921 if (radeon_mc_wait_for_idle(rdev
)) {
4922 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
4924 /* Lockout access through VGA aperture*/
4925 WREG32(VGA_HDP_CONTROL
, VGA_MEMORY_DISABLE
);
4926 /* Update configuration */
4927 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR
,
4928 rdev
->mc
.vram_start
>> 12);
4929 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR
,
4930 rdev
->mc
.vram_end
>> 12);
4931 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR
,
4932 rdev
->vram_scratch
.gpu_addr
>> 12);
4933 tmp
= ((rdev
->mc
.vram_end
>> 24) & 0xFFFF) << 16;
4934 tmp
|= ((rdev
->mc
.vram_start
>> 24) & 0xFFFF);
4935 WREG32(MC_VM_FB_LOCATION
, tmp
);
4936 /* XXX double check these! */
4937 WREG32(HDP_NONSURFACE_BASE
, (rdev
->mc
.vram_start
>> 8));
4938 WREG32(HDP_NONSURFACE_INFO
, (2 << 7) | (1 << 30));
4939 WREG32(HDP_NONSURFACE_SIZE
, 0x3FFFFFFF);
4940 WREG32(MC_VM_AGP_BASE
, 0);
4941 WREG32(MC_VM_AGP_TOP
, 0x0FFFFFFF);
4942 WREG32(MC_VM_AGP_BOT
, 0x0FFFFFFF);
4943 if (radeon_mc_wait_for_idle(rdev
)) {
4944 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
4946 evergreen_mc_resume(rdev
, &save
);
4947 /* we need to own VRAM, so turn off the VGA renderer here
4948 * to stop it overwriting our objects */
4949 rv515_vga_render_disable(rdev
);
4953 * cik_mc_init - initialize the memory controller driver params
4955 * @rdev: radeon_device pointer
4957 * Look up the amount of vram, vram width, and decide how to place
4958 * vram and gart within the GPU's physical address space (CIK).
4959 * Returns 0 for success.
4961 static int cik_mc_init(struct radeon_device
*rdev
)
4964 int chansize
, numchan
;
4966 /* Get VRAM informations */
4967 rdev
->mc
.vram_is_ddr
= true;
4968 tmp
= RREG32(MC_ARB_RAMCFG
);
4969 if (tmp
& CHANSIZE_MASK
) {
4974 tmp
= RREG32(MC_SHARED_CHMAP
);
4975 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
5005 rdev
->mc
.vram_width
= numchan
* chansize
;
5006 /* Could aper size report 0 ? */
5007 rdev
->mc
.aper_base
= pci_resource_start(rdev
->pdev
, 0);
5008 rdev
->mc
.aper_size
= pci_resource_len(rdev
->pdev
, 0);
5009 /* size in MB on si */
5010 rdev
->mc
.mc_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
5011 rdev
->mc
.real_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
5012 rdev
->mc
.visible_vram_size
= rdev
->mc
.aper_size
;
5013 si_vram_gtt_location(rdev
, &rdev
->mc
);
5014 radeon_update_bandwidth_info(rdev
);
5021 * VMID 0 is the physical GPU addresses as used by the kernel.
5022 * VMIDs 1-15 are used for userspace clients and are handled
5023 * by the radeon vm/hsa code.
5026 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5028 * @rdev: radeon_device pointer
5030 * Flush the TLB for the VMID 0 page table (CIK).
5032 void cik_pcie_gart_tlb_flush(struct radeon_device
*rdev
)
5034 /* flush hdp cache */
5035 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL
, 0);
5037 /* bits 0-15 are the VM contexts0-15 */
5038 WREG32(VM_INVALIDATE_REQUEST
, 0x1);
5042 * cik_pcie_gart_enable - gart enable
5044 * @rdev: radeon_device pointer
5046 * This sets up the TLBs, programs the page tables for VMID0,
5047 * sets up the hw for VMIDs 1-15 which are allocated on
5048 * demand, and sets up the global locations for the LDS, GDS,
5049 * and GPUVM for FSA64 clients (CIK).
5050 * Returns 0 for success, errors for failure.
5052 static int cik_pcie_gart_enable(struct radeon_device
*rdev
)
5056 if (rdev
->gart
.robj
== NULL
) {
5057 dev_err(rdev
->dev
, "No VRAM object for PCIE GART.\n");
5060 r
= radeon_gart_table_vram_pin(rdev
);
5063 radeon_gart_restore(rdev
);
5064 /* Setup TLB control */
5065 WREG32(MC_VM_MX_L1_TLB_CNTL
,
5068 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
5069 ENABLE_ADVANCED_DRIVER_MODEL
|
5070 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
5071 /* Setup L2 cache */
5072 WREG32(VM_L2_CNTL
, ENABLE_L2_CACHE
|
5073 ENABLE_L2_FRAGMENT_PROCESSING
|
5074 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
5075 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
5076 EFFECTIVE_L2_QUEUE_SIZE(7) |
5077 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5078 WREG32(VM_L2_CNTL2
, INVALIDATE_ALL_L1_TLBS
| INVALIDATE_L2_CACHE
);
5079 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
5080 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5081 /* setup context0 */
5082 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR
, rdev
->mc
.gtt_start
>> 12);
5083 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR
, rdev
->mc
.gtt_end
>> 12);
5084 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
, rdev
->gart
.table_addr
>> 12);
5085 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR
,
5086 (u32
)(rdev
->dummy_page
.addr
>> 12));
5087 WREG32(VM_CONTEXT0_CNTL2
, 0);
5088 WREG32(VM_CONTEXT0_CNTL
, (ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(0) |
5089 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
));
5095 /* empty context1-15 */
5096 /* FIXME start with 4G, once using 2 level pt switch to full
5099 /* set vm size, must be a multiple of 4 */
5100 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR
, 0);
5101 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR
, rdev
->vm_manager
.max_pfn
);
5102 for (i
= 1; i
< 16; i
++) {
5104 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (i
<< 2),
5105 rdev
->gart
.table_addr
>> 12);
5107 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((i
- 8) << 2),
5108 rdev
->gart
.table_addr
>> 12);
5111 /* enable context1-15 */
5112 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR
,
5113 (u32
)(rdev
->dummy_page
.addr
>> 12));
5114 WREG32(VM_CONTEXT1_CNTL2
, 4);
5115 WREG32(VM_CONTEXT1_CNTL
, ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(1) |
5116 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5117 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
5118 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5119 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
5120 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5121 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT
|
5122 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5123 VALID_PROTECTION_FAULT_ENABLE_DEFAULT
|
5124 READ_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5125 READ_PROTECTION_FAULT_ENABLE_DEFAULT
|
5126 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
5127 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT
);
5129 /* TC cache setup ??? */
5130 WREG32(TC_CFG_L1_LOAD_POLICY0
, 0);
5131 WREG32(TC_CFG_L1_LOAD_POLICY1
, 0);
5132 WREG32(TC_CFG_L1_STORE_POLICY
, 0);
5134 WREG32(TC_CFG_L2_LOAD_POLICY0
, 0);
5135 WREG32(TC_CFG_L2_LOAD_POLICY1
, 0);
5136 WREG32(TC_CFG_L2_STORE_POLICY0
, 0);
5137 WREG32(TC_CFG_L2_STORE_POLICY1
, 0);
5138 WREG32(TC_CFG_L2_ATOMIC_POLICY
, 0);
5140 WREG32(TC_CFG_L1_VOLATILE
, 0);
5141 WREG32(TC_CFG_L2_VOLATILE
, 0);
5143 if (rdev
->family
== CHIP_KAVERI
) {
5144 u32 tmp
= RREG32(CHUB_CONTROL
);
5146 WREG32(CHUB_CONTROL
, tmp
);
5149 /* XXX SH_MEM regs */
5150 /* where to put LDS, scratch, GPUVM in FSA64 space */
5151 mutex_lock(&rdev
->srbm_mutex
);
5152 for (i
= 0; i
< 16; i
++) {
5153 cik_srbm_select(rdev
, 0, 0, 0, i
);
5154 /* CP and shaders */
5155 WREG32(SH_MEM_CONFIG
, 0);
5156 WREG32(SH_MEM_APE1_BASE
, 1);
5157 WREG32(SH_MEM_APE1_LIMIT
, 0);
5158 WREG32(SH_MEM_BASES
, 0);
5160 WREG32(SDMA0_GFX_VIRTUAL_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
5161 WREG32(SDMA0_GFX_APE1_CNTL
+ SDMA0_REGISTER_OFFSET
, 0);
5162 WREG32(SDMA0_GFX_VIRTUAL_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
5163 WREG32(SDMA0_GFX_APE1_CNTL
+ SDMA1_REGISTER_OFFSET
, 0);
5164 /* XXX SDMA RLC - todo */
5166 cik_srbm_select(rdev
, 0, 0, 0, 0);
5167 mutex_unlock(&rdev
->srbm_mutex
);
5169 cik_pcie_gart_tlb_flush(rdev
);
5170 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5171 (unsigned)(rdev
->mc
.gtt_size
>> 20),
5172 (unsigned long long)rdev
->gart
.table_addr
);
5173 rdev
->gart
.ready
= true;
5178 * cik_pcie_gart_disable - gart disable
5180 * @rdev: radeon_device pointer
5182 * This disables all VM page table (CIK).
5184 static void cik_pcie_gart_disable(struct radeon_device
*rdev
)
5186 /* Disable all tables */
5187 WREG32(VM_CONTEXT0_CNTL
, 0);
5188 WREG32(VM_CONTEXT1_CNTL
, 0);
5189 /* Setup TLB control */
5190 WREG32(MC_VM_MX_L1_TLB_CNTL
, SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
5191 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
5192 /* Setup L2 cache */
5194 ENABLE_L2_FRAGMENT_PROCESSING
|
5195 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
5196 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
5197 EFFECTIVE_L2_QUEUE_SIZE(7) |
5198 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5199 WREG32(VM_L2_CNTL2
, 0);
5200 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
5201 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5202 radeon_gart_table_vram_unpin(rdev
);
5206 * cik_pcie_gart_fini - vm fini callback
5208 * @rdev: radeon_device pointer
5210 * Tears down the driver GART/VM setup (CIK).
5212 static void cik_pcie_gart_fini(struct radeon_device
*rdev
)
5214 cik_pcie_gart_disable(rdev
);
5215 radeon_gart_table_vram_free(rdev
);
5216 radeon_gart_fini(rdev
);
5221 * cik_ib_parse - vm ib_parse callback
5223 * @rdev: radeon_device pointer
5224 * @ib: indirect buffer pointer
5226 * CIK uses hw IB checking so this is a nop (CIK).
5228 int cik_ib_parse(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
5235 * VMID 0 is the physical GPU addresses as used by the kernel.
5236 * VMIDs 1-15 are used for userspace clients and are handled
5237 * by the radeon vm/hsa code.
5240 * cik_vm_init - cik vm init callback
5242 * @rdev: radeon_device pointer
5244 * Inits cik specific vm parameters (number of VMs, base of vram for
5245 * VMIDs 1-15) (CIK).
5246 * Returns 0 for success.
5248 int cik_vm_init(struct radeon_device
*rdev
)
5251 rdev
->vm_manager
.nvm
= 16;
5252 /* base offset of vram pages */
5253 if (rdev
->flags
& RADEON_IS_IGP
) {
5254 u64 tmp
= RREG32(MC_VM_FB_OFFSET
);
5256 rdev
->vm_manager
.vram_base_offset
= tmp
;
5258 rdev
->vm_manager
.vram_base_offset
= 0;
5264 * cik_vm_fini - cik vm fini callback
5266 * @rdev: radeon_device pointer
5268 * Tear down any asic specific VM setup (CIK).
5270 void cik_vm_fini(struct radeon_device
*rdev
)
5275 * cik_vm_decode_fault - print human readable fault info
5277 * @rdev: radeon_device pointer
5278 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5279 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5281 * Print human readable fault information (CIK).
5283 static void cik_vm_decode_fault(struct radeon_device
*rdev
,
5284 u32 status
, u32 addr
, u32 mc_client
)
5286 u32 mc_id
= (status
& MEMORY_CLIENT_ID_MASK
) >> MEMORY_CLIENT_ID_SHIFT
;
5287 u32 vmid
= (status
& FAULT_VMID_MASK
) >> FAULT_VMID_SHIFT
;
5288 u32 protections
= (status
& PROTECTIONS_MASK
) >> PROTECTIONS_SHIFT
;
5289 char *block
= (char *)&mc_client
;
5291 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5292 protections
, vmid
, addr
,
5293 (status
& MEMORY_CLIENT_RW_MASK
) ? "write" : "read",
5298 * cik_vm_flush - cik vm flush using the CP
5300 * @rdev: radeon_device pointer
5302 * Update the page table base and flush the VM TLB
5303 * using the CP (CIK).
5305 void cik_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
5307 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
5312 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5313 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5314 WRITE_DATA_DST_SEL(0)));
5316 radeon_ring_write(ring
,
5317 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
5319 radeon_ring_write(ring
,
5320 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
5322 radeon_ring_write(ring
, 0);
5323 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
5325 /* update SH_MEM_* regs */
5326 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5327 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5328 WRITE_DATA_DST_SEL(0)));
5329 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
5330 radeon_ring_write(ring
, 0);
5331 radeon_ring_write(ring
, VMID(vm
->id
));
5333 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 6));
5334 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5335 WRITE_DATA_DST_SEL(0)));
5336 radeon_ring_write(ring
, SH_MEM_BASES
>> 2);
5337 radeon_ring_write(ring
, 0);
5339 radeon_ring_write(ring
, 0); /* SH_MEM_BASES */
5340 radeon_ring_write(ring
, 0); /* SH_MEM_CONFIG */
5341 radeon_ring_write(ring
, 1); /* SH_MEM_APE1_BASE */
5342 radeon_ring_write(ring
, 0); /* SH_MEM_APE1_LIMIT */
5344 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5345 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5346 WRITE_DATA_DST_SEL(0)));
5347 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
5348 radeon_ring_write(ring
, 0);
5349 radeon_ring_write(ring
, VMID(0));
5352 /* We should be using the WAIT_REG_MEM packet here like in
5353 * cik_fence_ring_emit(), but it causes the CP to hang in this
5356 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5357 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5358 WRITE_DATA_DST_SEL(0)));
5359 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
5360 radeon_ring_write(ring
, 0);
5361 radeon_ring_write(ring
, 0);
5363 /* bits 0-15 are the VM contexts0-15 */
5364 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
5365 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
5366 WRITE_DATA_DST_SEL(0)));
5367 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
5368 radeon_ring_write(ring
, 0);
5369 radeon_ring_write(ring
, 1 << vm
->id
);
5371 /* compute doesn't have PFP */
5372 if (ridx
== RADEON_RING_TYPE_GFX_INDEX
) {
5373 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5374 radeon_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
5375 radeon_ring_write(ring
, 0x0);
5380 * cik_vm_set_page - update the page tables using sDMA
5382 * @rdev: radeon_device pointer
5383 * @ib: indirect buffer to fill with commands
5384 * @pe: addr of the page entry
5385 * @addr: dst addr to write into pe
5386 * @count: number of page entries to update
5387 * @incr: increase next addr by incr bytes
5388 * @flags: access flags
5390 * Update the page tables using CP or sDMA (CIK).
5392 void cik_vm_set_page(struct radeon_device
*rdev
,
5393 struct radeon_ib
*ib
,
5395 uint64_t addr
, unsigned count
,
5396 uint32_t incr
, uint32_t flags
)
5398 uint32_t r600_flags
= cayman_vm_page_flags(rdev
, flags
);
5402 if (rdev
->asic
->vm
.pt_ring_index
== RADEON_RING_TYPE_GFX_INDEX
) {
5405 ndw
= 2 + count
* 2;
5409 ib
->ptr
[ib
->length_dw
++] = PACKET3(PACKET3_WRITE_DATA
, ndw
);
5410 ib
->ptr
[ib
->length_dw
++] = (WRITE_DATA_ENGINE_SEL(0) |
5411 WRITE_DATA_DST_SEL(1));
5412 ib
->ptr
[ib
->length_dw
++] = pe
;
5413 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
5414 for (; ndw
> 2; ndw
-= 2, --count
, pe
+= 8) {
5415 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
5416 value
= radeon_vm_map_gart(rdev
, addr
);
5417 value
&= 0xFFFFFFFFFFFFF000ULL
;
5418 } else if (flags
& RADEON_VM_PAGE_VALID
) {
5424 value
|= r600_flags
;
5425 ib
->ptr
[ib
->length_dw
++] = value
;
5426 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
5431 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
5437 /* for non-physically contiguous pages (system) */
5438 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0);
5439 ib
->ptr
[ib
->length_dw
++] = pe
;
5440 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
5441 ib
->ptr
[ib
->length_dw
++] = ndw
;
5442 for (; ndw
> 0; ndw
-= 2, --count
, pe
+= 8) {
5443 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
5444 value
= radeon_vm_map_gart(rdev
, addr
);
5445 value
&= 0xFFFFFFFFFFFFF000ULL
;
5446 } else if (flags
& RADEON_VM_PAGE_VALID
) {
5452 value
|= r600_flags
;
5453 ib
->ptr
[ib
->length_dw
++] = value
;
5454 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
5463 if (flags
& RADEON_VM_PAGE_VALID
)
5467 /* for physically contiguous pages (vram) */
5468 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE
, 0, 0);
5469 ib
->ptr
[ib
->length_dw
++] = pe
; /* dst addr */
5470 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
5471 ib
->ptr
[ib
->length_dw
++] = r600_flags
; /* mask */
5472 ib
->ptr
[ib
->length_dw
++] = 0;
5473 ib
->ptr
[ib
->length_dw
++] = value
; /* value */
5474 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
5475 ib
->ptr
[ib
->length_dw
++] = incr
; /* increment size */
5476 ib
->ptr
[ib
->length_dw
++] = 0;
5477 ib
->ptr
[ib
->length_dw
++] = ndw
; /* number of entries */
5483 while (ib
->length_dw
& 0x7)
5484 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0);
5489 * cik_dma_vm_flush - cik vm flush using sDMA
5491 * @rdev: radeon_device pointer
5493 * Update the page table base and flush the VM TLB
5496 void cik_dma_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
5498 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
5499 u32 extra_bits
= (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5500 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5506 if (ridx
== R600_RING_TYPE_DMA_INDEX
)
5507 ref_and_mask
= SDMA0
;
5509 ref_and_mask
= SDMA1
;
5511 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5513 radeon_ring_write(ring
, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
5515 radeon_ring_write(ring
, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
5517 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
5519 /* update SH_MEM_* regs */
5520 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5521 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
5522 radeon_ring_write(ring
, VMID(vm
->id
));
5524 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5525 radeon_ring_write(ring
, SH_MEM_BASES
>> 2);
5526 radeon_ring_write(ring
, 0);
5528 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5529 radeon_ring_write(ring
, SH_MEM_CONFIG
>> 2);
5530 radeon_ring_write(ring
, 0);
5532 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5533 radeon_ring_write(ring
, SH_MEM_APE1_BASE
>> 2);
5534 radeon_ring_write(ring
, 1);
5536 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5537 radeon_ring_write(ring
, SH_MEM_APE1_LIMIT
>> 2);
5538 radeon_ring_write(ring
, 0);
5540 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5541 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
5542 radeon_ring_write(ring
, VMID(0));
5545 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM
, 0, extra_bits
));
5546 radeon_ring_write(ring
, GPU_HDP_FLUSH_DONE
);
5547 radeon_ring_write(ring
, GPU_HDP_FLUSH_REQ
);
5548 radeon_ring_write(ring
, ref_and_mask
); /* REFERENCE */
5549 radeon_ring_write(ring
, ref_and_mask
); /* MASK */
5550 radeon_ring_write(ring
, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5553 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
5554 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
5555 radeon_ring_write(ring
, 1 << vm
->id
);
5560 * The RLC is a multi-purpose microengine that handles a
5561 * variety of functions, the most important of which is
5562 * the interrupt controller.
5564 static void cik_enable_gui_idle_interrupt(struct radeon_device
*rdev
,
5567 u32 tmp
= RREG32(CP_INT_CNTL_RING0
);
5570 tmp
|= (CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
5572 tmp
&= ~(CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
5573 WREG32(CP_INT_CNTL_RING0
, tmp
);
5576 static void cik_enable_lbpw(struct radeon_device
*rdev
, bool enable
)
5580 tmp
= RREG32(RLC_LB_CNTL
);
5582 tmp
|= LOAD_BALANCE_ENABLE
;
5584 tmp
&= ~LOAD_BALANCE_ENABLE
;
5585 WREG32(RLC_LB_CNTL
, tmp
);
5588 static void cik_wait_for_rlc_serdes(struct radeon_device
*rdev
)
5593 for (i
= 0; i
< rdev
->config
.cik
.max_shader_engines
; i
++) {
5594 for (j
= 0; j
< rdev
->config
.cik
.max_sh_per_se
; j
++) {
5595 cik_select_se_sh(rdev
, i
, j
);
5596 for (k
= 0; k
< rdev
->usec_timeout
; k
++) {
5597 if (RREG32(RLC_SERDES_CU_MASTER_BUSY
) == 0)
5603 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5605 mask
= SE_MASTER_BUSY_MASK
| GC_MASTER_BUSY
| TC0_MASTER_BUSY
| TC1_MASTER_BUSY
;
5606 for (k
= 0; k
< rdev
->usec_timeout
; k
++) {
5607 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
5613 static void cik_update_rlc(struct radeon_device
*rdev
, u32 rlc
)
5617 tmp
= RREG32(RLC_CNTL
);
5619 WREG32(RLC_CNTL
, rlc
);
5622 static u32
cik_halt_rlc(struct radeon_device
*rdev
)
5626 orig
= data
= RREG32(RLC_CNTL
);
5628 if (data
& RLC_ENABLE
) {
5631 data
&= ~RLC_ENABLE
;
5632 WREG32(RLC_CNTL
, data
);
5634 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
5635 if ((RREG32(RLC_GPM_STAT
) & RLC_GPM_BUSY
) == 0)
5640 cik_wait_for_rlc_serdes(rdev
);
5646 void cik_enter_rlc_safe_mode(struct radeon_device
*rdev
)
5650 tmp
= REQ
| MESSAGE(MSG_ENTER_RLC_SAFE_MODE
);
5651 WREG32(RLC_GPR_REG2
, tmp
);
5653 mask
= GFX_POWER_STATUS
| GFX_CLOCK_STATUS
;
5654 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
5655 if ((RREG32(RLC_GPM_STAT
) & mask
) == mask
)
5660 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
5661 if ((RREG32(RLC_GPR_REG2
) & REQ
) == 0)
5667 void cik_exit_rlc_safe_mode(struct radeon_device
*rdev
)
5671 tmp
= REQ
| MESSAGE(MSG_EXIT_RLC_SAFE_MODE
);
5672 WREG32(RLC_GPR_REG2
, tmp
);
5676 * cik_rlc_stop - stop the RLC ME
5678 * @rdev: radeon_device pointer
5680 * Halt the RLC ME (MicroEngine) (CIK).
5682 static void cik_rlc_stop(struct radeon_device
*rdev
)
5684 WREG32(RLC_CNTL
, 0);
5686 cik_enable_gui_idle_interrupt(rdev
, false);
5688 cik_wait_for_rlc_serdes(rdev
);
5692 * cik_rlc_start - start the RLC ME
5694 * @rdev: radeon_device pointer
5696 * Unhalt the RLC ME (MicroEngine) (CIK).
5698 static void cik_rlc_start(struct radeon_device
*rdev
)
5700 WREG32(RLC_CNTL
, RLC_ENABLE
);
5702 cik_enable_gui_idle_interrupt(rdev
, true);
5708 * cik_rlc_resume - setup the RLC hw
5710 * @rdev: radeon_device pointer
5712 * Initialize the RLC registers, load the ucode,
5713 * and start the RLC (CIK).
5714 * Returns 0 for success, -EINVAL if the ucode is not available.
5716 static int cik_rlc_resume(struct radeon_device
*rdev
)
5719 const __be32
*fw_data
;
5724 switch (rdev
->family
) {
5727 size
= BONAIRE_RLC_UCODE_SIZE
;
5730 size
= KV_RLC_UCODE_SIZE
;
5733 size
= KB_RLC_UCODE_SIZE
;
5740 tmp
= RREG32(RLC_CGCG_CGLS_CTRL
) & 0xfffffffc;
5741 WREG32(RLC_CGCG_CGLS_CTRL
, tmp
);
5749 WREG32(RLC_LB_CNTR_INIT
, 0);
5750 WREG32(RLC_LB_CNTR_MAX
, 0x00008000);
5752 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5753 WREG32(RLC_LB_INIT_CU_MASK
, 0xffffffff);
5754 WREG32(RLC_LB_PARAMS
, 0x00600408);
5755 WREG32(RLC_LB_CNTL
, 0x80000004);
5757 WREG32(RLC_MC_CNTL
, 0);
5758 WREG32(RLC_UCODE_CNTL
, 0);
5760 fw_data
= (const __be32
*)rdev
->rlc_fw
->data
;
5761 WREG32(RLC_GPM_UCODE_ADDR
, 0);
5762 for (i
= 0; i
< size
; i
++)
5763 WREG32(RLC_GPM_UCODE_DATA
, be32_to_cpup(fw_data
++));
5764 WREG32(RLC_GPM_UCODE_ADDR
, 0);
5766 /* XXX - find out what chips support lbpw */
5767 cik_enable_lbpw(rdev
, false);
5769 if (rdev
->family
== CHIP_BONAIRE
)
5770 WREG32(RLC_DRIVER_DMA_STATUS
, 0);
5772 cik_rlc_start(rdev
);
5777 static void cik_enable_cgcg(struct radeon_device
*rdev
, bool enable
)
5779 u32 data
, orig
, tmp
, tmp2
;
5781 orig
= data
= RREG32(RLC_CGCG_CGLS_CTRL
);
5783 cik_enable_gui_idle_interrupt(rdev
, enable
);
5786 tmp
= cik_halt_rlc(rdev
);
5788 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5789 WREG32(RLC_SERDES_WR_CU_MASTER_MASK
, 0xffffffff);
5790 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK
, 0xffffffff);
5791 tmp2
= BPM_ADDR_MASK
| CGCG_OVERRIDE_0
| CGLS_ENABLE
;
5792 WREG32(RLC_SERDES_WR_CTRL
, tmp2
);
5794 cik_update_rlc(rdev
, tmp
);
5796 data
|= CGCG_EN
| CGLS_EN
;
5798 RREG32(CB_CGTT_SCLK_CTRL
);
5799 RREG32(CB_CGTT_SCLK_CTRL
);
5800 RREG32(CB_CGTT_SCLK_CTRL
);
5801 RREG32(CB_CGTT_SCLK_CTRL
);
5803 data
&= ~(CGCG_EN
| CGLS_EN
);
5807 WREG32(RLC_CGCG_CGLS_CTRL
, data
);
5811 static void cik_enable_mgcg(struct radeon_device
*rdev
, bool enable
)
5813 u32 data
, orig
, tmp
= 0;
5816 orig
= data
= RREG32(CP_MEM_SLP_CNTL
);
5817 data
|= CP_MEM_LS_EN
;
5819 WREG32(CP_MEM_SLP_CNTL
, data
);
5821 orig
= data
= RREG32(RLC_CGTT_MGCG_OVERRIDE
);
5824 WREG32(RLC_CGTT_MGCG_OVERRIDE
, data
);
5826 tmp
= cik_halt_rlc(rdev
);
5828 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5829 WREG32(RLC_SERDES_WR_CU_MASTER_MASK
, 0xffffffff);
5830 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK
, 0xffffffff);
5831 data
= BPM_ADDR_MASK
| MGCG_OVERRIDE_0
;
5832 WREG32(RLC_SERDES_WR_CTRL
, data
);
5834 cik_update_rlc(rdev
, tmp
);
5836 orig
= data
= RREG32(CGTS_SM_CTRL_REG
);
5837 data
&= ~SM_MODE_MASK
;
5838 data
|= SM_MODE(0x2);
5839 data
|= SM_MODE_ENABLE
;
5840 data
&= ~CGTS_OVERRIDE
;
5841 data
&= ~CGTS_LS_OVERRIDE
;
5842 data
&= ~ON_MONITOR_ADD_MASK
;
5843 data
|= ON_MONITOR_ADD_EN
;
5844 data
|= ON_MONITOR_ADD(0x96);
5846 WREG32(CGTS_SM_CTRL_REG
, data
);
5848 orig
= data
= RREG32(RLC_CGTT_MGCG_OVERRIDE
);
5851 WREG32(RLC_CGTT_MGCG_OVERRIDE
, data
);
5853 data
= RREG32(RLC_MEM_SLP_CNTL
);
5854 if (data
& RLC_MEM_LS_EN
) {
5855 data
&= ~RLC_MEM_LS_EN
;
5856 WREG32(RLC_MEM_SLP_CNTL
, data
);
5859 data
= RREG32(CP_MEM_SLP_CNTL
);
5860 if (data
& CP_MEM_LS_EN
) {
5861 data
&= ~CP_MEM_LS_EN
;
5862 WREG32(CP_MEM_SLP_CNTL
, data
);
5865 orig
= data
= RREG32(CGTS_SM_CTRL_REG
);
5866 data
|= CGTS_OVERRIDE
| CGTS_LS_OVERRIDE
;
5868 WREG32(CGTS_SM_CTRL_REG
, data
);
5870 tmp
= cik_halt_rlc(rdev
);
5872 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
5873 WREG32(RLC_SERDES_WR_CU_MASTER_MASK
, 0xffffffff);
5874 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK
, 0xffffffff);
5875 data
= BPM_ADDR_MASK
| MGCG_OVERRIDE_1
;
5876 WREG32(RLC_SERDES_WR_CTRL
, data
);
5878 cik_update_rlc(rdev
, tmp
);
5882 static const u32 mc_cg_registers
[] =
5895 static void cik_enable_mc_ls(struct radeon_device
*rdev
,
5901 for (i
= 0; i
< ARRAY_SIZE(mc_cg_registers
); i
++) {
5902 orig
= data
= RREG32(mc_cg_registers
[i
]);
5904 data
|= MC_LS_ENABLE
;
5906 data
&= ~MC_LS_ENABLE
;
5908 WREG32(mc_cg_registers
[i
], data
);
5912 static void cik_enable_mc_mgcg(struct radeon_device
*rdev
,
5918 for (i
= 0; i
< ARRAY_SIZE(mc_cg_registers
); i
++) {
5919 orig
= data
= RREG32(mc_cg_registers
[i
]);
5921 data
|= MC_CG_ENABLE
;
5923 data
&= ~MC_CG_ENABLE
;
5925 WREG32(mc_cg_registers
[i
], data
);
5929 static void cik_enable_sdma_mgcg(struct radeon_device
*rdev
,
5935 WREG32(SDMA0_CLK_CTRL
+ SDMA0_REGISTER_OFFSET
, 0x00000100);
5936 WREG32(SDMA0_CLK_CTRL
+ SDMA1_REGISTER_OFFSET
, 0x00000100);
5938 orig
= data
= RREG32(SDMA0_CLK_CTRL
+ SDMA0_REGISTER_OFFSET
);
5941 WREG32(SDMA0_CLK_CTRL
+ SDMA0_REGISTER_OFFSET
, data
);
5943 orig
= data
= RREG32(SDMA0_CLK_CTRL
+ SDMA1_REGISTER_OFFSET
);
5946 WREG32(SDMA0_CLK_CTRL
+ SDMA1_REGISTER_OFFSET
, data
);
5950 static void cik_enable_sdma_mgls(struct radeon_device
*rdev
,
5956 orig
= data
= RREG32(SDMA0_POWER_CNTL
+ SDMA0_REGISTER_OFFSET
);
5959 WREG32(SDMA0_POWER_CNTL
+ SDMA0_REGISTER_OFFSET
, data
);
5961 orig
= data
= RREG32(SDMA0_POWER_CNTL
+ SDMA1_REGISTER_OFFSET
);
5964 WREG32(SDMA0_POWER_CNTL
+ SDMA1_REGISTER_OFFSET
, data
);
5966 orig
= data
= RREG32(SDMA0_POWER_CNTL
+ SDMA0_REGISTER_OFFSET
);
5969 WREG32(SDMA0_POWER_CNTL
+ SDMA0_REGISTER_OFFSET
, data
);
5971 orig
= data
= RREG32(SDMA0_POWER_CNTL
+ SDMA1_REGISTER_OFFSET
);
5974 WREG32(SDMA0_POWER_CNTL
+ SDMA1_REGISTER_OFFSET
, data
);
5978 static void cik_enable_uvd_mgcg(struct radeon_device
*rdev
,
5984 data
= RREG32_UVD_CTX(UVD_CGC_MEM_CTRL
);
5986 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL
, data
);
5988 orig
= data
= RREG32(UVD_CGC_CTRL
);
5991 WREG32(UVD_CGC_CTRL
, data
);
5993 data
= RREG32_UVD_CTX(UVD_CGC_MEM_CTRL
);
5995 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL
, data
);
5997 orig
= data
= RREG32(UVD_CGC_CTRL
);
6000 WREG32(UVD_CGC_CTRL
, data
);
6004 static void cik_enable_hdp_mgcg(struct radeon_device
*rdev
,
6009 orig
= data
= RREG32(HDP_HOST_PATH_CNTL
);
6012 data
&= ~CLOCK_GATING_DIS
;
6014 data
|= CLOCK_GATING_DIS
;
6017 WREG32(HDP_HOST_PATH_CNTL
, data
);
6020 static void cik_enable_hdp_ls(struct radeon_device
*rdev
,
6025 orig
= data
= RREG32(HDP_MEM_POWER_LS
);
6028 data
|= HDP_LS_ENABLE
;
6030 data
&= ~HDP_LS_ENABLE
;
6033 WREG32(HDP_MEM_POWER_LS
, data
);
6036 void cik_update_cg(struct radeon_device
*rdev
,
6037 u32 block
, bool enable
)
6039 if (block
& RADEON_CG_BLOCK_GFX
) {
6040 /* order matters! */
6042 cik_enable_mgcg(rdev
, true);
6043 cik_enable_cgcg(rdev
, true);
6045 cik_enable_cgcg(rdev
, false);
6046 cik_enable_mgcg(rdev
, false);
6050 if (block
& RADEON_CG_BLOCK_MC
) {
6051 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
6052 cik_enable_mc_mgcg(rdev
, enable
);
6053 cik_enable_mc_ls(rdev
, enable
);
6057 if (block
& RADEON_CG_BLOCK_SDMA
) {
6058 cik_enable_sdma_mgcg(rdev
, enable
);
6059 cik_enable_sdma_mgls(rdev
, enable
);
6062 if (block
& RADEON_CG_BLOCK_UVD
) {
6064 cik_enable_uvd_mgcg(rdev
, enable
);
6067 if (block
& RADEON_CG_BLOCK_HDP
) {
6068 cik_enable_hdp_mgcg(rdev
, enable
);
6069 cik_enable_hdp_ls(rdev
, enable
);
6073 static void cik_init_cg(struct radeon_device
*rdev
)
6076 cik_update_cg(rdev
, RADEON_CG_BLOCK_GFX
, false); /* XXX true */
6079 si_init_uvd_internal_cg(rdev
);
6081 cik_update_cg(rdev
, (RADEON_CG_BLOCK_MC
|
6082 RADEON_CG_BLOCK_SDMA
|
6083 RADEON_CG_BLOCK_UVD
|
6084 RADEON_CG_BLOCK_HDP
), true);
6087 static void cik_enable_sck_slowdown_on_pu(struct radeon_device
*rdev
,
6092 orig
= data
= RREG32(RLC_PG_CNTL
);
6094 data
|= SMU_CLK_SLOWDOWN_ON_PU_ENABLE
;
6096 data
&= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE
;
6098 WREG32(RLC_PG_CNTL
, data
);
6101 static void cik_enable_sck_slowdown_on_pd(struct radeon_device
*rdev
,
6106 orig
= data
= RREG32(RLC_PG_CNTL
);
6108 data
|= SMU_CLK_SLOWDOWN_ON_PD_ENABLE
;
6110 data
&= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE
;
6112 WREG32(RLC_PG_CNTL
, data
);
6115 static void cik_enable_cp_pg(struct radeon_device
*rdev
, bool enable
)
6119 orig
= data
= RREG32(RLC_PG_CNTL
);
6121 data
&= ~DISABLE_CP_PG
;
6123 data
|= DISABLE_CP_PG
;
6125 WREG32(RLC_PG_CNTL
, data
);
6128 static void cik_enable_gds_pg(struct radeon_device
*rdev
, bool enable
)
6132 orig
= data
= RREG32(RLC_PG_CNTL
);
6134 data
&= ~DISABLE_GDS_PG
;
6136 data
|= DISABLE_GDS_PG
;
6138 WREG32(RLC_PG_CNTL
, data
);
6141 #define CP_ME_TABLE_SIZE 96
6142 #define CP_ME_TABLE_OFFSET 2048
6143 #define CP_MEC_TABLE_OFFSET 4096
6145 void cik_init_cp_pg_table(struct radeon_device
*rdev
)
6147 const __be32
*fw_data
;
6148 volatile u32
*dst_ptr
;
6149 int me
, i
, max_me
= 4;
6153 if (rdev
->family
== CHIP_KAVERI
)
6156 if (rdev
->rlc
.cp_table_ptr
== NULL
)
6159 /* write the cp table buffer */
6160 dst_ptr
= rdev
->rlc
.cp_table_ptr
;
6161 for (me
= 0; me
< max_me
; me
++) {
6163 fw_data
= (const __be32
*)rdev
->ce_fw
->data
;
6164 table_offset
= CP_ME_TABLE_OFFSET
;
6165 } else if (me
== 1) {
6166 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
6167 table_offset
= CP_ME_TABLE_OFFSET
;
6168 } else if (me
== 2) {
6169 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
6170 table_offset
= CP_ME_TABLE_OFFSET
;
6172 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
6173 table_offset
= CP_MEC_TABLE_OFFSET
;
6176 for (i
= 0; i
< CP_ME_TABLE_SIZE
; i
++) {
6177 dst_ptr
[bo_offset
+ i
] = be32_to_cpu(fw_data
[table_offset
+ i
]);
6179 bo_offset
+= CP_ME_TABLE_SIZE
;
6183 static void cik_enable_gfx_cgpg(struct radeon_device
*rdev
,
6189 orig
= data
= RREG32(RLC_PG_CNTL
);
6190 data
|= GFX_PG_ENABLE
;
6192 WREG32(RLC_PG_CNTL
, data
);
6194 orig
= data
= RREG32(RLC_AUTO_PG_CTRL
);
6197 WREG32(RLC_AUTO_PG_CTRL
, data
);
6199 orig
= data
= RREG32(RLC_PG_CNTL
);
6200 data
&= ~GFX_PG_ENABLE
;
6202 WREG32(RLC_PG_CNTL
, data
);
6204 orig
= data
= RREG32(RLC_AUTO_PG_CTRL
);
6205 data
&= ~AUTO_PG_EN
;
6207 WREG32(RLC_AUTO_PG_CTRL
, data
);
6209 data
= RREG32(DB_RENDER_CONTROL
);
6213 static u32
cik_get_cu_active_bitmap(struct radeon_device
*rdev
, u32 se
, u32 sh
)
6215 u32 mask
= 0, tmp
, tmp1
;
6218 cik_select_se_sh(rdev
, se
, sh
);
6219 tmp
= RREG32(CC_GC_SHADER_ARRAY_CONFIG
);
6220 tmp1
= RREG32(GC_USER_SHADER_ARRAY_CONFIG
);
6221 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
6228 for (i
= 0; i
< rdev
->config
.cik
.max_cu_per_sh
; i
++) {
6233 return (~tmp
) & mask
;
6236 static void cik_init_ao_cu_mask(struct radeon_device
*rdev
)
6238 u32 i
, j
, k
, active_cu_number
= 0;
6239 u32 mask
, counter
, cu_bitmap
;
6242 for (i
= 0; i
< rdev
->config
.cik
.max_shader_engines
; i
++) {
6243 for (j
= 0; j
< rdev
->config
.cik
.max_sh_per_se
; j
++) {
6247 for (k
= 0; k
< rdev
->config
.cik
.max_cu_per_sh
; k
++) {
6248 if (cik_get_cu_active_bitmap(rdev
, i
, j
) & mask
) {
6256 active_cu_number
+= counter
;
6257 tmp
|= (cu_bitmap
<< (i
* 16 + j
* 8));
6261 WREG32(RLC_PG_AO_CU_MASK
, tmp
);
6263 tmp
= RREG32(RLC_MAX_PG_CU
);
6264 tmp
&= ~MAX_PU_CU_MASK
;
6265 tmp
|= MAX_PU_CU(active_cu_number
);
6266 WREG32(RLC_MAX_PG_CU
, tmp
);
6269 static void cik_enable_gfx_static_mgpg(struct radeon_device
*rdev
,
6274 orig
= data
= RREG32(RLC_PG_CNTL
);
6276 data
|= STATIC_PER_CU_PG_ENABLE
;
6278 data
&= ~STATIC_PER_CU_PG_ENABLE
;
6280 WREG32(RLC_PG_CNTL
, data
);
6283 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device
*rdev
,
6288 orig
= data
= RREG32(RLC_PG_CNTL
);
6290 data
|= DYN_PER_CU_PG_ENABLE
;
6292 data
&= ~DYN_PER_CU_PG_ENABLE
;
6294 WREG32(RLC_PG_CNTL
, data
);
6297 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6298 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6300 static void cik_init_gfx_cgpg(struct radeon_device
*rdev
)
6305 if (rdev
->rlc
.cs_data
) {
6306 WREG32(RLC_GPM_SCRATCH_ADDR
, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET
);
6307 WREG32(RLC_GPM_SCRATCH_DATA
, upper_32_bits(rdev
->rlc
.clear_state_gpu_addr
));
6308 WREG32(RLC_GPM_SCRATCH_DATA
, rdev
->rlc
.clear_state_gpu_addr
);
6309 WREG32(RLC_GPM_SCRATCH_DATA
, rdev
->rlc
.clear_state_size
);
6311 WREG32(RLC_GPM_SCRATCH_ADDR
, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET
);
6312 for (i
= 0; i
< 3; i
++)
6313 WREG32(RLC_GPM_SCRATCH_DATA
, 0);
6315 if (rdev
->rlc
.reg_list
) {
6316 WREG32(RLC_GPM_SCRATCH_ADDR
, RLC_SAVE_AND_RESTORE_STARTING_OFFSET
);
6317 for (i
= 0; i
< rdev
->rlc
.reg_list_size
; i
++)
6318 WREG32(RLC_GPM_SCRATCH_DATA
, rdev
->rlc
.reg_list
[i
]);
6321 orig
= data
= RREG32(RLC_PG_CNTL
);
6324 WREG32(RLC_PG_CNTL
, data
);
6326 WREG32(RLC_SAVE_AND_RESTORE_BASE
, rdev
->rlc
.save_restore_gpu_addr
>> 8);
6327 WREG32(RLC_CP_TABLE_RESTORE
, rdev
->rlc
.cp_table_gpu_addr
>> 8);
6329 data
= RREG32(CP_RB_WPTR_POLL_CNTL
);
6330 data
&= ~IDLE_POLL_COUNT_MASK
;
6331 data
|= IDLE_POLL_COUNT(0x60);
6332 WREG32(CP_RB_WPTR_POLL_CNTL
, data
);
6335 WREG32(RLC_PG_DELAY
, data
);
6337 data
= RREG32(RLC_PG_DELAY_2
);
6340 WREG32(RLC_PG_DELAY_2
, data
);
6342 data
= RREG32(RLC_AUTO_PG_CTRL
);
6343 data
&= ~GRBM_REG_SGIT_MASK
;
6344 data
|= GRBM_REG_SGIT(0x700);
6345 WREG32(RLC_AUTO_PG_CTRL
, data
);
6349 static void cik_update_gfx_pg(struct radeon_device
*rdev
, bool enable
)
6351 bool has_pg
= false;
6352 bool has_dyn_mgpg
= false;
6353 bool has_static_mgpg
= false;
6355 /* only APUs have PG */
6356 if (rdev
->flags
& RADEON_IS_IGP
) {
6358 has_static_mgpg
= true;
6359 if (rdev
->family
== CHIP_KAVERI
)
6360 has_dyn_mgpg
= true;
6364 cik_enable_gfx_cgpg(rdev
, enable
);
6366 cik_enable_gfx_static_mgpg(rdev
, has_static_mgpg
);
6367 cik_enable_gfx_dynamic_mgpg(rdev
, has_dyn_mgpg
);
6369 cik_enable_gfx_static_mgpg(rdev
, false);
6370 cik_enable_gfx_dynamic_mgpg(rdev
, false);
6376 void cik_init_pg(struct radeon_device
*rdev
)
6378 bool has_pg
= false;
6380 /* only APUs have PG */
6381 if (rdev
->flags
& RADEON_IS_IGP
) {
6382 /* XXX disable this for now */
6383 /* has_pg = true; */
6387 cik_enable_sck_slowdown_on_pu(rdev
, true);
6388 cik_enable_sck_slowdown_on_pd(rdev
, true);
6389 cik_init_gfx_cgpg(rdev
);
6390 cik_enable_cp_pg(rdev
, true);
6391 cik_enable_gds_pg(rdev
, true);
6392 cik_init_ao_cu_mask(rdev
);
6393 cik_update_gfx_pg(rdev
, true);
6399 * Starting with r6xx, interrupts are handled via a ring buffer.
6400 * Ring buffers are areas of GPU accessible memory that the GPU
6401 * writes interrupt vectors into and the host reads vectors out of.
6402 * There is a rptr (read pointer) that determines where the
6403 * host is currently reading, and a wptr (write pointer)
6404 * which determines where the GPU has written. When the
6405 * pointers are equal, the ring is idle. When the GPU
6406 * writes vectors to the ring buffer, it increments the
6407 * wptr. When there is an interrupt, the host then starts
6408 * fetching commands and processing them until the pointers are
6409 * equal again at which point it updates the rptr.
6413 * cik_enable_interrupts - Enable the interrupt ring buffer
6415 * @rdev: radeon_device pointer
6417 * Enable the interrupt ring buffer (CIK).
6419 static void cik_enable_interrupts(struct radeon_device
*rdev
)
6421 u32 ih_cntl
= RREG32(IH_CNTL
);
6422 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
6424 ih_cntl
|= ENABLE_INTR
;
6425 ih_rb_cntl
|= IH_RB_ENABLE
;
6426 WREG32(IH_CNTL
, ih_cntl
);
6427 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
6428 rdev
->ih
.enabled
= true;
6432 * cik_disable_interrupts - Disable the interrupt ring buffer
6434 * @rdev: radeon_device pointer
6436 * Disable the interrupt ring buffer (CIK).
6438 static void cik_disable_interrupts(struct radeon_device
*rdev
)
6440 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
6441 u32 ih_cntl
= RREG32(IH_CNTL
);
6443 ih_rb_cntl
&= ~IH_RB_ENABLE
;
6444 ih_cntl
&= ~ENABLE_INTR
;
6445 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
6446 WREG32(IH_CNTL
, ih_cntl
);
6447 /* set rptr, wptr to 0 */
6448 WREG32(IH_RB_RPTR
, 0);
6449 WREG32(IH_RB_WPTR
, 0);
6450 rdev
->ih
.enabled
= false;
6455 * cik_disable_interrupt_state - Disable all interrupt sources
6457 * @rdev: radeon_device pointer
6459 * Clear all interrupt enable bits used by the driver (CIK).
6461 static void cik_disable_interrupt_state(struct radeon_device
*rdev
)
6466 WREG32(CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
6468 tmp
= RREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
6469 WREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
, tmp
);
6470 tmp
= RREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
6471 WREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
, tmp
);
6472 /* compute queues */
6473 WREG32(CP_ME1_PIPE0_INT_CNTL
, 0);
6474 WREG32(CP_ME1_PIPE1_INT_CNTL
, 0);
6475 WREG32(CP_ME1_PIPE2_INT_CNTL
, 0);
6476 WREG32(CP_ME1_PIPE3_INT_CNTL
, 0);
6477 WREG32(CP_ME2_PIPE0_INT_CNTL
, 0);
6478 WREG32(CP_ME2_PIPE1_INT_CNTL
, 0);
6479 WREG32(CP_ME2_PIPE2_INT_CNTL
, 0);
6480 WREG32(CP_ME2_PIPE3_INT_CNTL
, 0);
6482 WREG32(GRBM_INT_CNTL
, 0);
6483 /* vline/vblank, etc. */
6484 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, 0);
6485 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, 0);
6486 if (rdev
->num_crtc
>= 4) {
6487 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, 0);
6488 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, 0);
6490 if (rdev
->num_crtc
>= 6) {
6491 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, 0);
6492 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, 0);
6496 WREG32(DAC_AUTODETECT_INT_CONTROL
, 0);
6498 /* digital hotplug */
6499 tmp
= RREG32(DC_HPD1_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6500 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
6501 tmp
= RREG32(DC_HPD2_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6502 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
6503 tmp
= RREG32(DC_HPD3_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6504 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
6505 tmp
= RREG32(DC_HPD4_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6506 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
6507 tmp
= RREG32(DC_HPD5_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6508 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
6509 tmp
= RREG32(DC_HPD6_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
6510 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
6515 * cik_irq_init - init and enable the interrupt ring
6517 * @rdev: radeon_device pointer
6519 * Allocate a ring buffer for the interrupt controller,
6520 * enable the RLC, disable interrupts, enable the IH
6521 * ring buffer and enable it (CIK).
6522 * Called at device load and reume.
6523 * Returns 0 for success, errors for failure.
6525 static int cik_irq_init(struct radeon_device
*rdev
)
6529 u32 interrupt_cntl
, ih_cntl
, ih_rb_cntl
;
6532 ret
= r600_ih_ring_alloc(rdev
);
6537 cik_disable_interrupts(rdev
);
6540 ret
= cik_rlc_resume(rdev
);
6542 r600_ih_ring_fini(rdev
);
6546 /* setup interrupt control */
6547 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6548 WREG32(INTERRUPT_CNTL2
, rdev
->ih
.gpu_addr
>> 8);
6549 interrupt_cntl
= RREG32(INTERRUPT_CNTL
);
6550 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6551 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6553 interrupt_cntl
&= ~IH_DUMMY_RD_OVERRIDE
;
6554 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6555 interrupt_cntl
&= ~IH_REQ_NONSNOOP_EN
;
6556 WREG32(INTERRUPT_CNTL
, interrupt_cntl
);
6558 WREG32(IH_RB_BASE
, rdev
->ih
.gpu_addr
>> 8);
6559 rb_bufsz
= drm_order(rdev
->ih
.ring_size
/ 4);
6561 ih_rb_cntl
= (IH_WPTR_OVERFLOW_ENABLE
|
6562 IH_WPTR_OVERFLOW_CLEAR
|
6565 if (rdev
->wb
.enabled
)
6566 ih_rb_cntl
|= IH_WPTR_WRITEBACK_ENABLE
;
6568 /* set the writeback address whether it's enabled or not */
6569 WREG32(IH_RB_WPTR_ADDR_LO
, (rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFFFFFFFC);
6570 WREG32(IH_RB_WPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFF);
6572 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
6574 /* set rptr, wptr to 0 */
6575 WREG32(IH_RB_RPTR
, 0);
6576 WREG32(IH_RB_WPTR
, 0);
6578 /* Default settings for IH_CNTL (disabled at first) */
6579 ih_cntl
= MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6580 /* RPTR_REARM only works if msi's are enabled */
6581 if (rdev
->msi_enabled
)
6582 ih_cntl
|= RPTR_REARM
;
6583 WREG32(IH_CNTL
, ih_cntl
);
6585 /* force the active interrupt state to all disabled */
6586 cik_disable_interrupt_state(rdev
);
6588 pci_set_master(rdev
->pdev
);
6591 cik_enable_interrupts(rdev
);
6597 * cik_irq_set - enable/disable interrupt sources
6599 * @rdev: radeon_device pointer
6601 * Enable interrupt sources on the GPU (vblanks, hpd,
6603 * Returns 0 for success, errors for failure.
6605 int cik_irq_set(struct radeon_device
*rdev
)
6607 u32 cp_int_cntl
= CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
|
6608 PRIV_INSTR_INT_ENABLE
| PRIV_REG_INT_ENABLE
;
6609 u32 cp_m1p0
, cp_m1p1
, cp_m1p2
, cp_m1p3
;
6610 u32 cp_m2p0
, cp_m2p1
, cp_m2p2
, cp_m2p3
;
6611 u32 crtc1
= 0, crtc2
= 0, crtc3
= 0, crtc4
= 0, crtc5
= 0, crtc6
= 0;
6612 u32 hpd1
, hpd2
, hpd3
, hpd4
, hpd5
, hpd6
;
6613 u32 grbm_int_cntl
= 0;
6614 u32 dma_cntl
, dma_cntl1
;
6617 if (!rdev
->irq
.installed
) {
6618 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6621 /* don't enable anything if the ih is disabled */
6622 if (!rdev
->ih
.enabled
) {
6623 cik_disable_interrupts(rdev
);
6624 /* force the active interrupt state to all disabled */
6625 cik_disable_interrupt_state(rdev
);
6629 hpd1
= RREG32(DC_HPD1_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6630 hpd2
= RREG32(DC_HPD2_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6631 hpd3
= RREG32(DC_HPD3_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6632 hpd4
= RREG32(DC_HPD4_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6633 hpd5
= RREG32(DC_HPD5_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6634 hpd6
= RREG32(DC_HPD6_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
6636 dma_cntl
= RREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
6637 dma_cntl1
= RREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
6639 cp_m1p0
= RREG32(CP_ME1_PIPE0_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6640 cp_m1p1
= RREG32(CP_ME1_PIPE1_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6641 cp_m1p2
= RREG32(CP_ME1_PIPE2_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6642 cp_m1p3
= RREG32(CP_ME1_PIPE3_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6643 cp_m2p0
= RREG32(CP_ME2_PIPE0_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6644 cp_m2p1
= RREG32(CP_ME2_PIPE1_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6645 cp_m2p2
= RREG32(CP_ME2_PIPE2_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6646 cp_m2p3
= RREG32(CP_ME2_PIPE3_INT_CNTL
) & ~TIME_STAMP_INT_ENABLE
;
6648 if (rdev
->flags
& RADEON_IS_IGP
)
6649 thermal_int
= RREG32_SMC(CG_THERMAL_INT_CTRL
) &
6650 ~(THERM_INTH_MASK
| THERM_INTL_MASK
);
6652 thermal_int
= RREG32_SMC(CG_THERMAL_INT
) &
6653 ~(THERM_INT_MASK_HIGH
| THERM_INT_MASK_LOW
);
6655 /* enable CP interrupts on all rings */
6656 if (atomic_read(&rdev
->irq
.ring_int
[RADEON_RING_TYPE_GFX_INDEX
])) {
6657 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6658 cp_int_cntl
|= TIME_STAMP_INT_ENABLE
;
6660 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_CP1_INDEX
])) {
6661 struct radeon_ring
*ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
6662 DRM_DEBUG("si_irq_set: sw int cp1\n");
6663 if (ring
->me
== 1) {
6664 switch (ring
->pipe
) {
6666 cp_m1p0
|= TIME_STAMP_INT_ENABLE
;
6669 cp_m1p1
|= TIME_STAMP_INT_ENABLE
;
6672 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
6675 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
6678 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring
->pipe
);
6681 } else if (ring
->me
== 2) {
6682 switch (ring
->pipe
) {
6684 cp_m2p0
|= TIME_STAMP_INT_ENABLE
;
6687 cp_m2p1
|= TIME_STAMP_INT_ENABLE
;
6690 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
6693 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
6696 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring
->pipe
);
6700 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring
->me
);
6703 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_CP2_INDEX
])) {
6704 struct radeon_ring
*ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
6705 DRM_DEBUG("si_irq_set: sw int cp2\n");
6706 if (ring
->me
== 1) {
6707 switch (ring
->pipe
) {
6709 cp_m1p0
|= TIME_STAMP_INT_ENABLE
;
6712 cp_m1p1
|= TIME_STAMP_INT_ENABLE
;
6715 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
6718 cp_m1p2
|= TIME_STAMP_INT_ENABLE
;
6721 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring
->pipe
);
6724 } else if (ring
->me
== 2) {
6725 switch (ring
->pipe
) {
6727 cp_m2p0
|= TIME_STAMP_INT_ENABLE
;
6730 cp_m2p1
|= TIME_STAMP_INT_ENABLE
;
6733 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
6736 cp_m2p2
|= TIME_STAMP_INT_ENABLE
;
6739 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring
->pipe
);
6743 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring
->me
);
6747 if (atomic_read(&rdev
->irq
.ring_int
[R600_RING_TYPE_DMA_INDEX
])) {
6748 DRM_DEBUG("cik_irq_set: sw int dma\n");
6749 dma_cntl
|= TRAP_ENABLE
;
6752 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_DMA1_INDEX
])) {
6753 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6754 dma_cntl1
|= TRAP_ENABLE
;
6757 if (rdev
->irq
.crtc_vblank_int
[0] ||
6758 atomic_read(&rdev
->irq
.pflip
[0])) {
6759 DRM_DEBUG("cik_irq_set: vblank 0\n");
6760 crtc1
|= VBLANK_INTERRUPT_MASK
;
6762 if (rdev
->irq
.crtc_vblank_int
[1] ||
6763 atomic_read(&rdev
->irq
.pflip
[1])) {
6764 DRM_DEBUG("cik_irq_set: vblank 1\n");
6765 crtc2
|= VBLANK_INTERRUPT_MASK
;
6767 if (rdev
->irq
.crtc_vblank_int
[2] ||
6768 atomic_read(&rdev
->irq
.pflip
[2])) {
6769 DRM_DEBUG("cik_irq_set: vblank 2\n");
6770 crtc3
|= VBLANK_INTERRUPT_MASK
;
6772 if (rdev
->irq
.crtc_vblank_int
[3] ||
6773 atomic_read(&rdev
->irq
.pflip
[3])) {
6774 DRM_DEBUG("cik_irq_set: vblank 3\n");
6775 crtc4
|= VBLANK_INTERRUPT_MASK
;
6777 if (rdev
->irq
.crtc_vblank_int
[4] ||
6778 atomic_read(&rdev
->irq
.pflip
[4])) {
6779 DRM_DEBUG("cik_irq_set: vblank 4\n");
6780 crtc5
|= VBLANK_INTERRUPT_MASK
;
6782 if (rdev
->irq
.crtc_vblank_int
[5] ||
6783 atomic_read(&rdev
->irq
.pflip
[5])) {
6784 DRM_DEBUG("cik_irq_set: vblank 5\n");
6785 crtc6
|= VBLANK_INTERRUPT_MASK
;
6787 if (rdev
->irq
.hpd
[0]) {
6788 DRM_DEBUG("cik_irq_set: hpd 1\n");
6789 hpd1
|= DC_HPDx_INT_EN
;
6791 if (rdev
->irq
.hpd
[1]) {
6792 DRM_DEBUG("cik_irq_set: hpd 2\n");
6793 hpd2
|= DC_HPDx_INT_EN
;
6795 if (rdev
->irq
.hpd
[2]) {
6796 DRM_DEBUG("cik_irq_set: hpd 3\n");
6797 hpd3
|= DC_HPDx_INT_EN
;
6799 if (rdev
->irq
.hpd
[3]) {
6800 DRM_DEBUG("cik_irq_set: hpd 4\n");
6801 hpd4
|= DC_HPDx_INT_EN
;
6803 if (rdev
->irq
.hpd
[4]) {
6804 DRM_DEBUG("cik_irq_set: hpd 5\n");
6805 hpd5
|= DC_HPDx_INT_EN
;
6807 if (rdev
->irq
.hpd
[5]) {
6808 DRM_DEBUG("cik_irq_set: hpd 6\n");
6809 hpd6
|= DC_HPDx_INT_EN
;
6812 if (rdev
->irq
.dpm_thermal
) {
6813 DRM_DEBUG("dpm thermal\n");
6814 if (rdev
->flags
& RADEON_IS_IGP
)
6815 thermal_int
|= THERM_INTH_MASK
| THERM_INTL_MASK
;
6817 thermal_int
|= THERM_INT_MASK_HIGH
| THERM_INT_MASK_LOW
;
6820 WREG32(CP_INT_CNTL_RING0
, cp_int_cntl
);
6822 WREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
, dma_cntl
);
6823 WREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
, dma_cntl1
);
6825 WREG32(CP_ME1_PIPE0_INT_CNTL
, cp_m1p0
);
6826 WREG32(CP_ME1_PIPE1_INT_CNTL
, cp_m1p1
);
6827 WREG32(CP_ME1_PIPE2_INT_CNTL
, cp_m1p2
);
6828 WREG32(CP_ME1_PIPE3_INT_CNTL
, cp_m1p3
);
6829 WREG32(CP_ME2_PIPE0_INT_CNTL
, cp_m2p0
);
6830 WREG32(CP_ME2_PIPE1_INT_CNTL
, cp_m2p1
);
6831 WREG32(CP_ME2_PIPE2_INT_CNTL
, cp_m2p2
);
6832 WREG32(CP_ME2_PIPE3_INT_CNTL
, cp_m2p3
);
6834 WREG32(GRBM_INT_CNTL
, grbm_int_cntl
);
6836 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, crtc1
);
6837 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, crtc2
);
6838 if (rdev
->num_crtc
>= 4) {
6839 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, crtc3
);
6840 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, crtc4
);
6842 if (rdev
->num_crtc
>= 6) {
6843 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, crtc5
);
6844 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, crtc6
);
6847 WREG32(DC_HPD1_INT_CONTROL
, hpd1
);
6848 WREG32(DC_HPD2_INT_CONTROL
, hpd2
);
6849 WREG32(DC_HPD3_INT_CONTROL
, hpd3
);
6850 WREG32(DC_HPD4_INT_CONTROL
, hpd4
);
6851 WREG32(DC_HPD5_INT_CONTROL
, hpd5
);
6852 WREG32(DC_HPD6_INT_CONTROL
, hpd6
);
6854 if (rdev
->flags
& RADEON_IS_IGP
)
6855 WREG32_SMC(CG_THERMAL_INT_CTRL
, thermal_int
);
6857 WREG32_SMC(CG_THERMAL_INT
, thermal_int
);
6863 * cik_irq_ack - ack interrupt sources
6865 * @rdev: radeon_device pointer
6867 * Ack interrupt sources on the GPU (vblanks, hpd,
6868 * etc.) (CIK). Certain interrupts sources are sw
6869 * generated and do not require an explicit ack.
6871 static inline void cik_irq_ack(struct radeon_device
*rdev
)
6875 rdev
->irq
.stat_regs
.cik
.disp_int
= RREG32(DISP_INTERRUPT_STATUS
);
6876 rdev
->irq
.stat_regs
.cik
.disp_int_cont
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE
);
6877 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE2
);
6878 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE3
);
6879 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE4
);
6880 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE5
);
6881 rdev
->irq
.stat_regs
.cik
.disp_int_cont6
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE6
);
6883 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VBLANK_INTERRUPT
)
6884 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VBLANK_ACK
);
6885 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VLINE_INTERRUPT
)
6886 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VLINE_ACK
);
6887 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
)
6888 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VBLANK_ACK
);
6889 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
)
6890 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VLINE_ACK
);
6892 if (rdev
->num_crtc
>= 4) {
6893 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
)
6894 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VBLANK_ACK
);
6895 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
)
6896 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VLINE_ACK
);
6897 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
)
6898 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VBLANK_ACK
);
6899 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
)
6900 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VLINE_ACK
);
6903 if (rdev
->num_crtc
>= 6) {
6904 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
)
6905 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VBLANK_ACK
);
6906 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
)
6907 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VLINE_ACK
);
6908 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
)
6909 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VBLANK_ACK
);
6910 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
)
6911 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VLINE_ACK
);
6914 if (rdev
->irq
.stat_regs
.cik
.disp_int
& DC_HPD1_INTERRUPT
) {
6915 tmp
= RREG32(DC_HPD1_INT_CONTROL
);
6916 tmp
|= DC_HPDx_INT_ACK
;
6917 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
6919 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
6920 tmp
= RREG32(DC_HPD2_INT_CONTROL
);
6921 tmp
|= DC_HPDx_INT_ACK
;
6922 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
6924 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
6925 tmp
= RREG32(DC_HPD3_INT_CONTROL
);
6926 tmp
|= DC_HPDx_INT_ACK
;
6927 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
6929 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
6930 tmp
= RREG32(DC_HPD4_INT_CONTROL
);
6931 tmp
|= DC_HPDx_INT_ACK
;
6932 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
6934 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
6935 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
6936 tmp
|= DC_HPDx_INT_ACK
;
6937 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
6939 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
6940 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
6941 tmp
|= DC_HPDx_INT_ACK
;
6942 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
6947 * cik_irq_disable - disable interrupts
6949 * @rdev: radeon_device pointer
6951 * Disable interrupts on the hw (CIK).
6953 static void cik_irq_disable(struct radeon_device
*rdev
)
6955 cik_disable_interrupts(rdev
);
6956 /* Wait and acknowledge irq */
6959 cik_disable_interrupt_state(rdev
);
6963 * cik_irq_disable - disable interrupts for suspend
6965 * @rdev: radeon_device pointer
6967 * Disable interrupts and stop the RLC (CIK).
6970 static void cik_irq_suspend(struct radeon_device
*rdev
)
6972 cik_irq_disable(rdev
);
6977 * cik_irq_fini - tear down interrupt support
6979 * @rdev: radeon_device pointer
6981 * Disable interrupts on the hw and free the IH ring
6983 * Used for driver unload.
6985 static void cik_irq_fini(struct radeon_device
*rdev
)
6987 cik_irq_suspend(rdev
);
6988 r600_ih_ring_fini(rdev
);
6992 * cik_get_ih_wptr - get the IH ring buffer wptr
6994 * @rdev: radeon_device pointer
6996 * Get the IH ring buffer wptr from either the register
6997 * or the writeback memory buffer (CIK). Also check for
6998 * ring buffer overflow and deal with it.
6999 * Used by cik_irq_process().
7000 * Returns the value of the wptr.
7002 static inline u32
cik_get_ih_wptr(struct radeon_device
*rdev
)
7006 if (rdev
->wb
.enabled
)
7007 wptr
= le32_to_cpu(rdev
->wb
.wb
[R600_WB_IH_WPTR_OFFSET
/4]);
7009 wptr
= RREG32(IH_RB_WPTR
);
7011 if (wptr
& RB_OVERFLOW
) {
7012 /* When a ring buffer overflow happen start parsing interrupt
7013 * from the last not overwritten vector (wptr + 16). Hopefully
7014 * this should allow us to catchup.
7016 dev_warn(rdev
->dev
, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7017 wptr
, rdev
->ih
.rptr
, (wptr
+ 16) + rdev
->ih
.ptr_mask
);
7018 rdev
->ih
.rptr
= (wptr
+ 16) & rdev
->ih
.ptr_mask
;
7019 tmp
= RREG32(IH_RB_CNTL
);
7020 tmp
|= IH_WPTR_OVERFLOW_CLEAR
;
7021 WREG32(IH_RB_CNTL
, tmp
);
7023 return (wptr
& rdev
->ih
.ptr_mask
);
7027 * Each IV ring entry is 128 bits:
7028 * [7:0] - interrupt source id
7030 * [59:32] - interrupt source data
7031 * [63:60] - reserved
7034 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7035 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7036 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7037 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7038 * PIPE_ID - ME0 0=3D
7039 * - ME1&2 compute dispatcher (4 pipes each)
7041 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7042 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7043 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7046 * [127:96] - reserved
7049 * cik_irq_process - interrupt handler
7051 * @rdev: radeon_device pointer
7053 * Interrupt hander (CIK). Walk the IH ring,
7054 * ack interrupts and schedule work to handle
7056 * Returns irq process return code.
7058 int cik_irq_process(struct radeon_device
*rdev
)
7060 struct radeon_ring
*cp1_ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
7061 struct radeon_ring
*cp2_ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
7064 u32 src_id
, src_data
, ring_id
;
7065 u8 me_id
, pipe_id
, queue_id
;
7067 bool queue_hotplug
= false;
7068 bool queue_reset
= false;
7069 u32 addr
, status
, mc_client
;
7070 bool queue_thermal
= false;
7072 if (!rdev
->ih
.enabled
|| rdev
->shutdown
)
7075 wptr
= cik_get_ih_wptr(rdev
);
7078 /* is somebody else already processing irqs? */
7079 if (atomic_xchg(&rdev
->ih
.lock
, 1))
7082 rptr
= rdev
->ih
.rptr
;
7083 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr
, wptr
);
7085 /* Order reading of wptr vs. reading of IH ring data */
7088 /* display interrupts */
7091 while (rptr
!= wptr
) {
7092 /* wptr/rptr are in bytes! */
7093 ring_index
= rptr
/ 4;
7094 src_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
]) & 0xff;
7095 src_data
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 1]) & 0xfffffff;
7096 ring_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 2]) & 0xff;
7099 case 1: /* D1 vblank/vline */
7101 case 0: /* D1 vblank */
7102 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VBLANK_INTERRUPT
) {
7103 if (rdev
->irq
.crtc_vblank_int
[0]) {
7104 drm_handle_vblank(rdev
->ddev
, 0);
7105 rdev
->pm
.vblank_sync
= true;
7106 wake_up(&rdev
->irq
.vblank_queue
);
7108 if (atomic_read(&rdev
->irq
.pflip
[0]))
7109 radeon_crtc_handle_flip(rdev
, 0);
7110 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~LB_D1_VBLANK_INTERRUPT
;
7111 DRM_DEBUG("IH: D1 vblank\n");
7114 case 1: /* D1 vline */
7115 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VLINE_INTERRUPT
) {
7116 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~LB_D1_VLINE_INTERRUPT
;
7117 DRM_DEBUG("IH: D1 vline\n");
7121 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7125 case 2: /* D2 vblank/vline */
7127 case 0: /* D2 vblank */
7128 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
) {
7129 if (rdev
->irq
.crtc_vblank_int
[1]) {
7130 drm_handle_vblank(rdev
->ddev
, 1);
7131 rdev
->pm
.vblank_sync
= true;
7132 wake_up(&rdev
->irq
.vblank_queue
);
7134 if (atomic_read(&rdev
->irq
.pflip
[1]))
7135 radeon_crtc_handle_flip(rdev
, 1);
7136 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~LB_D2_VBLANK_INTERRUPT
;
7137 DRM_DEBUG("IH: D2 vblank\n");
7140 case 1: /* D2 vline */
7141 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
) {
7142 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~LB_D2_VLINE_INTERRUPT
;
7143 DRM_DEBUG("IH: D2 vline\n");
7147 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7151 case 3: /* D3 vblank/vline */
7153 case 0: /* D3 vblank */
7154 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
) {
7155 if (rdev
->irq
.crtc_vblank_int
[2]) {
7156 drm_handle_vblank(rdev
->ddev
, 2);
7157 rdev
->pm
.vblank_sync
= true;
7158 wake_up(&rdev
->irq
.vblank_queue
);
7160 if (atomic_read(&rdev
->irq
.pflip
[2]))
7161 radeon_crtc_handle_flip(rdev
, 2);
7162 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~LB_D3_VBLANK_INTERRUPT
;
7163 DRM_DEBUG("IH: D3 vblank\n");
7166 case 1: /* D3 vline */
7167 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
) {
7168 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~LB_D3_VLINE_INTERRUPT
;
7169 DRM_DEBUG("IH: D3 vline\n");
7173 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7177 case 4: /* D4 vblank/vline */
7179 case 0: /* D4 vblank */
7180 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
) {
7181 if (rdev
->irq
.crtc_vblank_int
[3]) {
7182 drm_handle_vblank(rdev
->ddev
, 3);
7183 rdev
->pm
.vblank_sync
= true;
7184 wake_up(&rdev
->irq
.vblank_queue
);
7186 if (atomic_read(&rdev
->irq
.pflip
[3]))
7187 radeon_crtc_handle_flip(rdev
, 3);
7188 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~LB_D4_VBLANK_INTERRUPT
;
7189 DRM_DEBUG("IH: D4 vblank\n");
7192 case 1: /* D4 vline */
7193 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
) {
7194 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~LB_D4_VLINE_INTERRUPT
;
7195 DRM_DEBUG("IH: D4 vline\n");
7199 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7203 case 5: /* D5 vblank/vline */
7205 case 0: /* D5 vblank */
7206 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
) {
7207 if (rdev
->irq
.crtc_vblank_int
[4]) {
7208 drm_handle_vblank(rdev
->ddev
, 4);
7209 rdev
->pm
.vblank_sync
= true;
7210 wake_up(&rdev
->irq
.vblank_queue
);
7212 if (atomic_read(&rdev
->irq
.pflip
[4]))
7213 radeon_crtc_handle_flip(rdev
, 4);
7214 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~LB_D5_VBLANK_INTERRUPT
;
7215 DRM_DEBUG("IH: D5 vblank\n");
7218 case 1: /* D5 vline */
7219 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
) {
7220 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~LB_D5_VLINE_INTERRUPT
;
7221 DRM_DEBUG("IH: D5 vline\n");
7225 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7229 case 6: /* D6 vblank/vline */
7231 case 0: /* D6 vblank */
7232 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
) {
7233 if (rdev
->irq
.crtc_vblank_int
[5]) {
7234 drm_handle_vblank(rdev
->ddev
, 5);
7235 rdev
->pm
.vblank_sync
= true;
7236 wake_up(&rdev
->irq
.vblank_queue
);
7238 if (atomic_read(&rdev
->irq
.pflip
[5]))
7239 radeon_crtc_handle_flip(rdev
, 5);
7240 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~LB_D6_VBLANK_INTERRUPT
;
7241 DRM_DEBUG("IH: D6 vblank\n");
7244 case 1: /* D6 vline */
7245 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
) {
7246 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~LB_D6_VLINE_INTERRUPT
;
7247 DRM_DEBUG("IH: D6 vline\n");
7251 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7255 case 42: /* HPD hotplug */
7258 if (rdev
->irq
.stat_regs
.cik
.disp_int
& DC_HPD1_INTERRUPT
) {
7259 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~DC_HPD1_INTERRUPT
;
7260 queue_hotplug
= true;
7261 DRM_DEBUG("IH: HPD1\n");
7265 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
7266 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~DC_HPD2_INTERRUPT
;
7267 queue_hotplug
= true;
7268 DRM_DEBUG("IH: HPD2\n");
7272 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
7273 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~DC_HPD3_INTERRUPT
;
7274 queue_hotplug
= true;
7275 DRM_DEBUG("IH: HPD3\n");
7279 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
7280 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~DC_HPD4_INTERRUPT
;
7281 queue_hotplug
= true;
7282 DRM_DEBUG("IH: HPD4\n");
7286 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
7287 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~DC_HPD5_INTERRUPT
;
7288 queue_hotplug
= true;
7289 DRM_DEBUG("IH: HPD5\n");
7293 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
7294 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~DC_HPD6_INTERRUPT
;
7295 queue_hotplug
= true;
7296 DRM_DEBUG("IH: HPD6\n");
7300 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7306 addr
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
);
7307 status
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
);
7308 mc_client
= RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT
);
7309 dev_err(rdev
->dev
, "GPU fault detected: %d 0x%08x\n", src_id
, src_data
);
7310 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7312 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7314 cik_vm_decode_fault(rdev
, status
, addr
, mc_client
);
7315 /* reset addr and status */
7316 WREG32_P(VM_CONTEXT1_CNTL2
, 1, ~1);
7318 case 176: /* GFX RB CP_INT */
7319 case 177: /* GFX IB CP_INT */
7320 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
7322 case 181: /* CP EOP event */
7323 DRM_DEBUG("IH: CP EOP\n");
7324 /* XXX check the bitfield order! */
7325 me_id
= (ring_id
& 0x60) >> 5;
7326 pipe_id
= (ring_id
& 0x18) >> 3;
7327 queue_id
= (ring_id
& 0x7) >> 0;
7330 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
7334 if ((cp1_ring
->me
== me_id
) & (cp1_ring
->pipe
== pipe_id
))
7335 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
7336 if ((cp2_ring
->me
== me_id
) & (cp2_ring
->pipe
== pipe_id
))
7337 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
7341 case 184: /* CP Privileged reg access */
7342 DRM_ERROR("Illegal register access in command stream\n");
7343 /* XXX check the bitfield order! */
7344 me_id
= (ring_id
& 0x60) >> 5;
7345 pipe_id
= (ring_id
& 0x18) >> 3;
7346 queue_id
= (ring_id
& 0x7) >> 0;
7349 /* This results in a full GPU reset, but all we need to do is soft
7350 * reset the CP for gfx
7364 case 185: /* CP Privileged inst */
7365 DRM_ERROR("Illegal instruction in command stream\n");
7366 /* XXX check the bitfield order! */
7367 me_id
= (ring_id
& 0x60) >> 5;
7368 pipe_id
= (ring_id
& 0x18) >> 3;
7369 queue_id
= (ring_id
& 0x7) >> 0;
7372 /* This results in a full GPU reset, but all we need to do is soft
7373 * reset the CP for gfx
7387 case 224: /* SDMA trap event */
7388 /* XXX check the bitfield order! */
7389 me_id
= (ring_id
& 0x3) >> 0;
7390 queue_id
= (ring_id
& 0xc) >> 2;
7391 DRM_DEBUG("IH: SDMA trap\n");
7396 radeon_fence_process(rdev
, R600_RING_TYPE_DMA_INDEX
);
7409 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
7421 case 230: /* thermal low to high */
7422 DRM_DEBUG("IH: thermal low to high\n");
7423 rdev
->pm
.dpm
.thermal
.high_to_low
= false;
7424 queue_thermal
= true;
7426 case 231: /* thermal high to low */
7427 DRM_DEBUG("IH: thermal high to low\n");
7428 rdev
->pm
.dpm
.thermal
.high_to_low
= true;
7429 queue_thermal
= true;
7431 case 233: /* GUI IDLE */
7432 DRM_DEBUG("IH: GUI idle\n");
7434 case 241: /* SDMA Privileged inst */
7435 case 247: /* SDMA Privileged inst */
7436 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7437 /* XXX check the bitfield order! */
7438 me_id
= (ring_id
& 0x3) >> 0;
7439 queue_id
= (ring_id
& 0xc) >> 2;
7474 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
7478 /* wptr/rptr are in bytes! */
7480 rptr
&= rdev
->ih
.ptr_mask
;
7483 schedule_work(&rdev
->hotplug_work
);
7485 schedule_work(&rdev
->reset_work
);
7487 schedule_work(&rdev
->pm
.dpm
.thermal
.work
);
7488 rdev
->ih
.rptr
= rptr
;
7489 WREG32(IH_RB_RPTR
, rdev
->ih
.rptr
);
7490 atomic_set(&rdev
->ih
.lock
, 0);
7492 /* make sure wptr hasn't changed while processing */
7493 wptr
= cik_get_ih_wptr(rdev
);
7501 * startup/shutdown callbacks
7504 * cik_startup - program the asic to a functional state
7506 * @rdev: radeon_device pointer
7508 * Programs the asic to a functional state (CIK).
7509 * Called by cik_init() and cik_resume().
7510 * Returns 0 for success, error for failure.
7512 static int cik_startup(struct radeon_device
*rdev
)
7514 struct radeon_ring
*ring
;
7517 /* enable pcie gen2/3 link */
7518 cik_pcie_gen3_enable(rdev
);
7520 cik_program_aspm(rdev
);
7522 cik_mc_program(rdev
);
7524 if (rdev
->flags
& RADEON_IS_IGP
) {
7525 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
7526 !rdev
->mec_fw
|| !rdev
->sdma_fw
|| !rdev
->rlc_fw
) {
7527 r
= cik_init_microcode(rdev
);
7529 DRM_ERROR("Failed to load firmware!\n");
7534 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
7535 !rdev
->mec_fw
|| !rdev
->sdma_fw
|| !rdev
->rlc_fw
||
7537 r
= cik_init_microcode(rdev
);
7539 DRM_ERROR("Failed to load firmware!\n");
7544 r
= ci_mc_load_microcode(rdev
);
7546 DRM_ERROR("Failed to load MC firmware!\n");
7551 r
= r600_vram_scratch_init(rdev
);
7555 r
= cik_pcie_gart_enable(rdev
);
7560 /* allocate rlc buffers */
7561 if (rdev
->flags
& RADEON_IS_IGP
) {
7562 if (rdev
->family
== CHIP_KAVERI
) {
7563 rdev
->rlc
.reg_list
= spectre_rlc_save_restore_register_list
;
7564 rdev
->rlc
.reg_list_size
=
7565 (u32
)ARRAY_SIZE(spectre_rlc_save_restore_register_list
);
7567 rdev
->rlc
.reg_list
= kalindi_rlc_save_restore_register_list
;
7568 rdev
->rlc
.reg_list_size
=
7569 (u32
)ARRAY_SIZE(kalindi_rlc_save_restore_register_list
);
7572 rdev
->rlc
.cs_data
= ci_cs_data
;
7573 rdev
->rlc
.cp_table_size
= CP_ME_TABLE_SIZE
* 5 * 4;
7574 r
= sumo_rlc_init(rdev
);
7576 DRM_ERROR("Failed to init rlc BOs!\n");
7580 /* allocate wb buffer */
7581 r
= radeon_wb_init(rdev
);
7585 /* allocate mec buffers */
7586 r
= cik_mec_init(rdev
);
7588 DRM_ERROR("Failed to init MEC BOs!\n");
7592 r
= radeon_fence_driver_start_ring(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
7594 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
7598 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
7600 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
7604 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
7606 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
7610 r
= radeon_fence_driver_start_ring(rdev
, R600_RING_TYPE_DMA_INDEX
);
7612 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
7616 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
7618 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
7622 r
= cik_uvd_resume(rdev
);
7624 r
= radeon_fence_driver_start_ring(rdev
,
7625 R600_RING_TYPE_UVD_INDEX
);
7627 dev_err(rdev
->dev
, "UVD fences init error (%d).\n", r
);
7630 rdev
->ring
[R600_RING_TYPE_UVD_INDEX
].ring_size
= 0;
7633 if (!rdev
->irq
.installed
) {
7634 r
= radeon_irq_kms_init(rdev
);
7639 r
= cik_irq_init(rdev
);
7641 DRM_ERROR("radeon: IH init failed (%d).\n", r
);
7642 radeon_irq_kms_fini(rdev
);
7647 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
7648 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP_RPTR_OFFSET
,
7649 CP_RB0_RPTR
, CP_RB0_WPTR
,
7650 0, 0xfffff, RADEON_CP_PACKET2
);
7654 /* set up the compute queues */
7655 /* type-2 packets are deprecated on MEC, use type-3 instead */
7656 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
7657 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP1_RPTR_OFFSET
,
7658 CP_HQD_PQ_RPTR
, CP_HQD_PQ_WPTR
,
7659 0, 0xfffff, PACKET3(PACKET3_NOP
, 0x3FFF));
7662 ring
->me
= 1; /* first MEC */
7663 ring
->pipe
= 0; /* first pipe */
7664 ring
->queue
= 0; /* first queue */
7665 ring
->wptr_offs
= CIK_WB_CP1_WPTR_OFFSET
;
7667 /* type-2 packets are deprecated on MEC, use type-3 instead */
7668 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
7669 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP2_RPTR_OFFSET
,
7670 CP_HQD_PQ_RPTR
, CP_HQD_PQ_WPTR
,
7671 0, 0xffffffff, PACKET3(PACKET3_NOP
, 0x3FFF));
7674 /* dGPU only have 1 MEC */
7675 ring
->me
= 1; /* first MEC */
7676 ring
->pipe
= 0; /* first pipe */
7677 ring
->queue
= 1; /* second queue */
7678 ring
->wptr_offs
= CIK_WB_CP2_WPTR_OFFSET
;
7680 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
7681 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, R600_WB_DMA_RPTR_OFFSET
,
7682 SDMA0_GFX_RB_RPTR
+ SDMA0_REGISTER_OFFSET
,
7683 SDMA0_GFX_RB_WPTR
+ SDMA0_REGISTER_OFFSET
,
7684 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
7688 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
7689 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, CAYMAN_WB_DMA1_RPTR_OFFSET
,
7690 SDMA0_GFX_RB_RPTR
+ SDMA1_REGISTER_OFFSET
,
7691 SDMA0_GFX_RB_WPTR
+ SDMA1_REGISTER_OFFSET
,
7692 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
7696 r
= cik_cp_resume(rdev
);
7700 r
= cik_sdma_resume(rdev
);
7704 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
7705 if (ring
->ring_size
) {
7706 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
,
7707 R600_WB_UVD_RPTR_OFFSET
,
7708 UVD_RBC_RB_RPTR
, UVD_RBC_RB_WPTR
,
7709 0, 0xfffff, RADEON_CP_PACKET2
);
7711 r
= r600_uvd_init(rdev
);
7713 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r
);
7716 r
= radeon_ib_pool_init(rdev
);
7718 dev_err(rdev
->dev
, "IB initialization failed (%d).\n", r
);
7722 r
= radeon_vm_manager_init(rdev
);
7724 dev_err(rdev
->dev
, "vm manager initialization failed (%d).\n", r
);
7732 * cik_resume - resume the asic to a functional state
7734 * @rdev: radeon_device pointer
7736 * Programs the asic to a functional state (CIK).
7738 * Returns 0 for success, error for failure.
7740 int cik_resume(struct radeon_device
*rdev
)
7745 atom_asic_init(rdev
->mode_info
.atom_context
);
7747 /* init golden registers */
7748 cik_init_golden_registers(rdev
);
7750 rdev
->accel_working
= true;
7751 r
= cik_startup(rdev
);
7753 DRM_ERROR("cik startup failed on resume\n");
7754 rdev
->accel_working
= false;
7763 * cik_suspend - suspend the asic
7765 * @rdev: radeon_device pointer
7767 * Bring the chip into a state suitable for suspend (CIK).
7768 * Called at suspend.
7769 * Returns 0 for success.
7771 int cik_suspend(struct radeon_device
*rdev
)
7773 radeon_vm_manager_fini(rdev
);
7774 cik_cp_enable(rdev
, false);
7775 cik_sdma_enable(rdev
, false);
7776 r600_uvd_stop(rdev
);
7777 radeon_uvd_suspend(rdev
);
7778 cik_irq_suspend(rdev
);
7779 radeon_wb_disable(rdev
);
7780 cik_pcie_gart_disable(rdev
);
7784 /* Plan is to move initialization in that function and use
7785 * helper function so that radeon_device_init pretty much
7786 * do nothing more than calling asic specific function. This
7787 * should also allow to remove a bunch of callback function
7791 * cik_init - asic specific driver and hw init
7793 * @rdev: radeon_device pointer
7795 * Setup asic specific driver variables and program the hw
7796 * to a functional state (CIK).
7797 * Called at driver startup.
7798 * Returns 0 for success, errors for failure.
7800 int cik_init(struct radeon_device
*rdev
)
7802 struct radeon_ring
*ring
;
7806 if (!radeon_get_bios(rdev
)) {
7807 if (ASIC_IS_AVIVO(rdev
))
7810 /* Must be an ATOMBIOS */
7811 if (!rdev
->is_atom_bios
) {
7812 dev_err(rdev
->dev
, "Expecting atombios for cayman GPU\n");
7815 r
= radeon_atombios_init(rdev
);
7819 /* Post card if necessary */
7820 if (!radeon_card_posted(rdev
)) {
7822 dev_err(rdev
->dev
, "Card not posted and no BIOS - ignoring\n");
7825 DRM_INFO("GPU not posted. posting now...\n");
7826 atom_asic_init(rdev
->mode_info
.atom_context
);
7828 /* init golden registers */
7829 cik_init_golden_registers(rdev
);
7830 /* Initialize scratch registers */
7831 cik_scratch_init(rdev
);
7832 /* Initialize surface registers */
7833 radeon_surface_init(rdev
);
7834 /* Initialize clocks */
7835 radeon_get_clock_info(rdev
->ddev
);
7838 r
= radeon_fence_driver_init(rdev
);
7842 /* initialize memory controller */
7843 r
= cik_mc_init(rdev
);
7846 /* Memory manager */
7847 r
= radeon_bo_init(rdev
);
7851 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
7852 ring
->ring_obj
= NULL
;
7853 r600_ring_init(rdev
, ring
, 1024 * 1024);
7855 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
7856 ring
->ring_obj
= NULL
;
7857 r600_ring_init(rdev
, ring
, 1024 * 1024);
7858 r
= radeon_doorbell_get(rdev
, &ring
->doorbell_page_num
);
7862 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
7863 ring
->ring_obj
= NULL
;
7864 r600_ring_init(rdev
, ring
, 1024 * 1024);
7865 r
= radeon_doorbell_get(rdev
, &ring
->doorbell_page_num
);
7869 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
7870 ring
->ring_obj
= NULL
;
7871 r600_ring_init(rdev
, ring
, 256 * 1024);
7873 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
7874 ring
->ring_obj
= NULL
;
7875 r600_ring_init(rdev
, ring
, 256 * 1024);
7877 r
= radeon_uvd_init(rdev
);
7879 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
7880 ring
->ring_obj
= NULL
;
7881 r600_ring_init(rdev
, ring
, 4096);
7884 rdev
->ih
.ring_obj
= NULL
;
7885 r600_ih_ring_init(rdev
, 64 * 1024);
7887 r
= r600_pcie_gart_init(rdev
);
7891 rdev
->accel_working
= true;
7892 r
= cik_startup(rdev
);
7894 dev_err(rdev
->dev
, "disabling GPU acceleration\n");
7896 cik_sdma_fini(rdev
);
7898 sumo_rlc_fini(rdev
);
7900 radeon_wb_fini(rdev
);
7901 radeon_ib_pool_fini(rdev
);
7902 radeon_vm_manager_fini(rdev
);
7903 radeon_irq_kms_fini(rdev
);
7904 cik_pcie_gart_fini(rdev
);
7905 rdev
->accel_working
= false;
7908 /* Don't start up if the MC ucode is missing.
7909 * The default clocks and voltages before the MC ucode
7910 * is loaded are not suffient for advanced operations.
7912 if (!rdev
->mc_fw
&& !(rdev
->flags
& RADEON_IS_IGP
)) {
7913 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7921 * cik_fini - asic specific driver and hw fini
7923 * @rdev: radeon_device pointer
7925 * Tear down the asic specific driver variables and program the hw
7926 * to an idle state (CIK).
7927 * Called at driver unload.
7929 void cik_fini(struct radeon_device
*rdev
)
7932 cik_sdma_fini(rdev
);
7934 sumo_rlc_fini(rdev
);
7936 radeon_wb_fini(rdev
);
7937 radeon_vm_manager_fini(rdev
);
7938 radeon_ib_pool_fini(rdev
);
7939 radeon_irq_kms_fini(rdev
);
7940 r600_uvd_stop(rdev
);
7941 radeon_uvd_fini(rdev
);
7942 cik_pcie_gart_fini(rdev
);
7943 r600_vram_scratch_fini(rdev
);
7944 radeon_gem_fini(rdev
);
7945 radeon_fence_driver_fini(rdev
);
7946 radeon_bo_fini(rdev
);
7947 radeon_atombios_fini(rdev
);
7952 /* display watermark setup */
7954 * dce8_line_buffer_adjust - Set up the line buffer
7956 * @rdev: radeon_device pointer
7957 * @radeon_crtc: the selected display controller
7958 * @mode: the current display mode on the selected display
7961 * Setup up the line buffer allocation for
7962 * the selected display controller (CIK).
7963 * Returns the line buffer size in pixels.
7965 static u32
dce8_line_buffer_adjust(struct radeon_device
*rdev
,
7966 struct radeon_crtc
*radeon_crtc
,
7967 struct drm_display_mode
*mode
)
7973 * There are 6 line buffers, one for each display controllers.
7974 * There are 3 partitions per LB. Select the number of partitions
7975 * to enable based on the display width. For display widths larger
7976 * than 4096, you need use to use 2 display controllers and combine
7977 * them using the stereo blender.
7979 if (radeon_crtc
->base
.enabled
&& mode
) {
7980 if (mode
->crtc_hdisplay
< 1920)
7982 else if (mode
->crtc_hdisplay
< 2560)
7984 else if (mode
->crtc_hdisplay
< 4096)
7987 DRM_DEBUG_KMS("Mode too big for LB!\n");
7993 WREG32(LB_MEMORY_CTRL
+ radeon_crtc
->crtc_offset
,
7994 LB_MEMORY_CONFIG(tmp
) | LB_MEMORY_SIZE(0x6B0));
7996 if (radeon_crtc
->base
.enabled
&& mode
) {
8008 /* controller not enabled, so no lb used */
8013 * cik_get_number_of_dram_channels - get the number of dram channels
8015 * @rdev: radeon_device pointer
8017 * Look up the number of video ram channels (CIK).
8018 * Used for display watermark bandwidth calculations
8019 * Returns the number of dram channels
8021 static u32
cik_get_number_of_dram_channels(struct radeon_device
*rdev
)
8023 u32 tmp
= RREG32(MC_SHARED_CHMAP
);
8025 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
8048 struct dce8_wm_params
{
8049 u32 dram_channels
; /* number of dram channels */
8050 u32 yclk
; /* bandwidth per dram data pin in kHz */
8051 u32 sclk
; /* engine clock in kHz */
8052 u32 disp_clk
; /* display clock in kHz */
8053 u32 src_width
; /* viewport width */
8054 u32 active_time
; /* active display time in ns */
8055 u32 blank_time
; /* blank time in ns */
8056 bool interlaced
; /* mode is interlaced */
8057 fixed20_12 vsc
; /* vertical scale ratio */
8058 u32 num_heads
; /* number of active crtcs */
8059 u32 bytes_per_pixel
; /* bytes per pixel display + overlay */
8060 u32 lb_size
; /* line buffer allocated to pipe */
8061 u32 vtaps
; /* vertical scaler taps */
8065 * dce8_dram_bandwidth - get the dram bandwidth
8067 * @wm: watermark calculation data
8069 * Calculate the raw dram bandwidth (CIK).
8070 * Used for display watermark bandwidth calculations
8071 * Returns the dram bandwidth in MBytes/s
8073 static u32
dce8_dram_bandwidth(struct dce8_wm_params
*wm
)
8075 /* Calculate raw DRAM Bandwidth */
8076 fixed20_12 dram_efficiency
; /* 0.7 */
8077 fixed20_12 yclk
, dram_channels
, bandwidth
;
8080 a
.full
= dfixed_const(1000);
8081 yclk
.full
= dfixed_const(wm
->yclk
);
8082 yclk
.full
= dfixed_div(yclk
, a
);
8083 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
8084 a
.full
= dfixed_const(10);
8085 dram_efficiency
.full
= dfixed_const(7);
8086 dram_efficiency
.full
= dfixed_div(dram_efficiency
, a
);
8087 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
8088 bandwidth
.full
= dfixed_mul(bandwidth
, dram_efficiency
);
8090 return dfixed_trunc(bandwidth
);
8094 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8096 * @wm: watermark calculation data
8098 * Calculate the dram bandwidth used for display (CIK).
8099 * Used for display watermark bandwidth calculations
8100 * Returns the dram bandwidth for display in MBytes/s
8102 static u32
dce8_dram_bandwidth_for_display(struct dce8_wm_params
*wm
)
8104 /* Calculate DRAM Bandwidth and the part allocated to display. */
8105 fixed20_12 disp_dram_allocation
; /* 0.3 to 0.7 */
8106 fixed20_12 yclk
, dram_channels
, bandwidth
;
8109 a
.full
= dfixed_const(1000);
8110 yclk
.full
= dfixed_const(wm
->yclk
);
8111 yclk
.full
= dfixed_div(yclk
, a
);
8112 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
8113 a
.full
= dfixed_const(10);
8114 disp_dram_allocation
.full
= dfixed_const(3); /* XXX worse case value 0.3 */
8115 disp_dram_allocation
.full
= dfixed_div(disp_dram_allocation
, a
);
8116 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
8117 bandwidth
.full
= dfixed_mul(bandwidth
, disp_dram_allocation
);
8119 return dfixed_trunc(bandwidth
);
8123 * dce8_data_return_bandwidth - get the data return bandwidth
8125 * @wm: watermark calculation data
8127 * Calculate the data return bandwidth used for display (CIK).
8128 * Used for display watermark bandwidth calculations
8129 * Returns the data return bandwidth in MBytes/s
8131 static u32
dce8_data_return_bandwidth(struct dce8_wm_params
*wm
)
8133 /* Calculate the display Data return Bandwidth */
8134 fixed20_12 return_efficiency
; /* 0.8 */
8135 fixed20_12 sclk
, bandwidth
;
8138 a
.full
= dfixed_const(1000);
8139 sclk
.full
= dfixed_const(wm
->sclk
);
8140 sclk
.full
= dfixed_div(sclk
, a
);
8141 a
.full
= dfixed_const(10);
8142 return_efficiency
.full
= dfixed_const(8);
8143 return_efficiency
.full
= dfixed_div(return_efficiency
, a
);
8144 a
.full
= dfixed_const(32);
8145 bandwidth
.full
= dfixed_mul(a
, sclk
);
8146 bandwidth
.full
= dfixed_mul(bandwidth
, return_efficiency
);
8148 return dfixed_trunc(bandwidth
);
8152 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8154 * @wm: watermark calculation data
8156 * Calculate the dmif bandwidth used for display (CIK).
8157 * Used for display watermark bandwidth calculations
8158 * Returns the dmif bandwidth in MBytes/s
8160 static u32
dce8_dmif_request_bandwidth(struct dce8_wm_params
*wm
)
8162 /* Calculate the DMIF Request Bandwidth */
8163 fixed20_12 disp_clk_request_efficiency
; /* 0.8 */
8164 fixed20_12 disp_clk
, bandwidth
;
8167 a
.full
= dfixed_const(1000);
8168 disp_clk
.full
= dfixed_const(wm
->disp_clk
);
8169 disp_clk
.full
= dfixed_div(disp_clk
, a
);
8170 a
.full
= dfixed_const(32);
8171 b
.full
= dfixed_mul(a
, disp_clk
);
8173 a
.full
= dfixed_const(10);
8174 disp_clk_request_efficiency
.full
= dfixed_const(8);
8175 disp_clk_request_efficiency
.full
= dfixed_div(disp_clk_request_efficiency
, a
);
8177 bandwidth
.full
= dfixed_mul(b
, disp_clk_request_efficiency
);
8179 return dfixed_trunc(bandwidth
);
8183 * dce8_available_bandwidth - get the min available bandwidth
8185 * @wm: watermark calculation data
8187 * Calculate the min available bandwidth used for display (CIK).
8188 * Used for display watermark bandwidth calculations
8189 * Returns the min available bandwidth in MBytes/s
8191 static u32
dce8_available_bandwidth(struct dce8_wm_params
*wm
)
8193 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8194 u32 dram_bandwidth
= dce8_dram_bandwidth(wm
);
8195 u32 data_return_bandwidth
= dce8_data_return_bandwidth(wm
);
8196 u32 dmif_req_bandwidth
= dce8_dmif_request_bandwidth(wm
);
8198 return min(dram_bandwidth
, min(data_return_bandwidth
, dmif_req_bandwidth
));
8202 * dce8_average_bandwidth - get the average available bandwidth
8204 * @wm: watermark calculation data
8206 * Calculate the average available bandwidth used for display (CIK).
8207 * Used for display watermark bandwidth calculations
8208 * Returns the average available bandwidth in MBytes/s
8210 static u32
dce8_average_bandwidth(struct dce8_wm_params
*wm
)
8212 /* Calculate the display mode Average Bandwidth
8213 * DisplayMode should contain the source and destination dimensions,
8217 fixed20_12 line_time
;
8218 fixed20_12 src_width
;
8219 fixed20_12 bandwidth
;
8222 a
.full
= dfixed_const(1000);
8223 line_time
.full
= dfixed_const(wm
->active_time
+ wm
->blank_time
);
8224 line_time
.full
= dfixed_div(line_time
, a
);
8225 bpp
.full
= dfixed_const(wm
->bytes_per_pixel
);
8226 src_width
.full
= dfixed_const(wm
->src_width
);
8227 bandwidth
.full
= dfixed_mul(src_width
, bpp
);
8228 bandwidth
.full
= dfixed_mul(bandwidth
, wm
->vsc
);
8229 bandwidth
.full
= dfixed_div(bandwidth
, line_time
);
8231 return dfixed_trunc(bandwidth
);
8235 * dce8_latency_watermark - get the latency watermark
8237 * @wm: watermark calculation data
8239 * Calculate the latency watermark (CIK).
8240 * Used for display watermark bandwidth calculations
8241 * Returns the latency watermark in ns
8243 static u32
dce8_latency_watermark(struct dce8_wm_params
*wm
)
8245 /* First calculate the latency in ns */
8246 u32 mc_latency
= 2000; /* 2000 ns. */
8247 u32 available_bandwidth
= dce8_available_bandwidth(wm
);
8248 u32 worst_chunk_return_time
= (512 * 8 * 1000) / available_bandwidth
;
8249 u32 cursor_line_pair_return_time
= (128 * 4 * 1000) / available_bandwidth
;
8250 u32 dc_latency
= 40000000 / wm
->disp_clk
; /* dc pipe latency */
8251 u32 other_heads_data_return_time
= ((wm
->num_heads
+ 1) * worst_chunk_return_time
) +
8252 (wm
->num_heads
* cursor_line_pair_return_time
);
8253 u32 latency
= mc_latency
+ other_heads_data_return_time
+ dc_latency
;
8254 u32 max_src_lines_per_dst_line
, lb_fill_bw
, line_fill_time
;
8255 u32 tmp
, dmif_size
= 12288;
8258 if (wm
->num_heads
== 0)
8261 a
.full
= dfixed_const(2);
8262 b
.full
= dfixed_const(1);
8263 if ((wm
->vsc
.full
> a
.full
) ||
8264 ((wm
->vsc
.full
> b
.full
) && (wm
->vtaps
>= 3)) ||
8266 ((wm
->vsc
.full
>= a
.full
) && wm
->interlaced
))
8267 max_src_lines_per_dst_line
= 4;
8269 max_src_lines_per_dst_line
= 2;
8271 a
.full
= dfixed_const(available_bandwidth
);
8272 b
.full
= dfixed_const(wm
->num_heads
);
8273 a
.full
= dfixed_div(a
, b
);
8275 b
.full
= dfixed_const(mc_latency
+ 512);
8276 c
.full
= dfixed_const(wm
->disp_clk
);
8277 b
.full
= dfixed_div(b
, c
);
8279 c
.full
= dfixed_const(dmif_size
);
8280 b
.full
= dfixed_div(c
, b
);
8282 tmp
= min(dfixed_trunc(a
), dfixed_trunc(b
));
8284 b
.full
= dfixed_const(1000);
8285 c
.full
= dfixed_const(wm
->disp_clk
);
8286 b
.full
= dfixed_div(c
, b
);
8287 c
.full
= dfixed_const(wm
->bytes_per_pixel
);
8288 b
.full
= dfixed_mul(b
, c
);
8290 lb_fill_bw
= min(tmp
, dfixed_trunc(b
));
8292 a
.full
= dfixed_const(max_src_lines_per_dst_line
* wm
->src_width
* wm
->bytes_per_pixel
);
8293 b
.full
= dfixed_const(1000);
8294 c
.full
= dfixed_const(lb_fill_bw
);
8295 b
.full
= dfixed_div(c
, b
);
8296 a
.full
= dfixed_div(a
, b
);
8297 line_fill_time
= dfixed_trunc(a
);
8299 if (line_fill_time
< wm
->active_time
)
8302 return latency
+ (line_fill_time
- wm
->active_time
);
8307 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8308 * average and available dram bandwidth
8310 * @wm: watermark calculation data
8312 * Check if the display average bandwidth fits in the display
8313 * dram bandwidth (CIK).
8314 * Used for display watermark bandwidth calculations
8315 * Returns true if the display fits, false if not.
8317 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params
*wm
)
8319 if (dce8_average_bandwidth(wm
) <=
8320 (dce8_dram_bandwidth_for_display(wm
) / wm
->num_heads
))
8327 * dce8_average_bandwidth_vs_available_bandwidth - check
8328 * average and available bandwidth
8330 * @wm: watermark calculation data
8332 * Check if the display average bandwidth fits in the display
8333 * available bandwidth (CIK).
8334 * Used for display watermark bandwidth calculations
8335 * Returns true if the display fits, false if not.
8337 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params
*wm
)
8339 if (dce8_average_bandwidth(wm
) <=
8340 (dce8_available_bandwidth(wm
) / wm
->num_heads
))
8347 * dce8_check_latency_hiding - check latency hiding
8349 * @wm: watermark calculation data
8351 * Check latency hiding (CIK).
8352 * Used for display watermark bandwidth calculations
8353 * Returns true if the display fits, false if not.
8355 static bool dce8_check_latency_hiding(struct dce8_wm_params
*wm
)
8357 u32 lb_partitions
= wm
->lb_size
/ wm
->src_width
;
8358 u32 line_time
= wm
->active_time
+ wm
->blank_time
;
8359 u32 latency_tolerant_lines
;
8363 a
.full
= dfixed_const(1);
8364 if (wm
->vsc
.full
> a
.full
)
8365 latency_tolerant_lines
= 1;
8367 if (lb_partitions
<= (wm
->vtaps
+ 1))
8368 latency_tolerant_lines
= 1;
8370 latency_tolerant_lines
= 2;
8373 latency_hiding
= (latency_tolerant_lines
* line_time
+ wm
->blank_time
);
8375 if (dce8_latency_watermark(wm
) <= latency_hiding
)
8382 * dce8_program_watermarks - program display watermarks
8384 * @rdev: radeon_device pointer
8385 * @radeon_crtc: the selected display controller
8386 * @lb_size: line buffer size
8387 * @num_heads: number of display controllers in use
8389 * Calculate and program the display watermarks for the
8390 * selected display controller (CIK).
8392 static void dce8_program_watermarks(struct radeon_device
*rdev
,
8393 struct radeon_crtc
*radeon_crtc
,
8394 u32 lb_size
, u32 num_heads
)
8396 struct drm_display_mode
*mode
= &radeon_crtc
->base
.mode
;
8397 struct dce8_wm_params wm_low
, wm_high
;
8400 u32 latency_watermark_a
= 0, latency_watermark_b
= 0;
8403 if (radeon_crtc
->base
.enabled
&& num_heads
&& mode
) {
8404 pixel_period
= 1000000 / (u32
)mode
->clock
;
8405 line_time
= min((u32
)mode
->crtc_htotal
* pixel_period
, (u32
)65535);
8407 /* watermark for high clocks */
8408 if ((rdev
->pm
.pm_method
== PM_METHOD_DPM
) &&
8409 rdev
->pm
.dpm_enabled
) {
8411 radeon_dpm_get_mclk(rdev
, false) * 10;
8413 radeon_dpm_get_sclk(rdev
, false) * 10;
8415 wm_high
.yclk
= rdev
->pm
.current_mclk
* 10;
8416 wm_high
.sclk
= rdev
->pm
.current_sclk
* 10;
8419 wm_high
.disp_clk
= mode
->clock
;
8420 wm_high
.src_width
= mode
->crtc_hdisplay
;
8421 wm_high
.active_time
= mode
->crtc_hdisplay
* pixel_period
;
8422 wm_high
.blank_time
= line_time
- wm_high
.active_time
;
8423 wm_high
.interlaced
= false;
8424 if (mode
->flags
& DRM_MODE_FLAG_INTERLACE
)
8425 wm_high
.interlaced
= true;
8426 wm_high
.vsc
= radeon_crtc
->vsc
;
8428 if (radeon_crtc
->rmx_type
!= RMX_OFF
)
8430 wm_high
.bytes_per_pixel
= 4; /* XXX: get this from fb config */
8431 wm_high
.lb_size
= lb_size
;
8432 wm_high
.dram_channels
= cik_get_number_of_dram_channels(rdev
);
8433 wm_high
.num_heads
= num_heads
;
8435 /* set for high clocks */
8436 latency_watermark_a
= min(dce8_latency_watermark(&wm_high
), (u32
)65535);
8438 /* possibly force display priority to high */
8439 /* should really do this at mode validation time... */
8440 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high
) ||
8441 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high
) ||
8442 !dce8_check_latency_hiding(&wm_high
) ||
8443 (rdev
->disp_priority
== 2)) {
8444 DRM_DEBUG_KMS("force priority to high\n");
8447 /* watermark for low clocks */
8448 if ((rdev
->pm
.pm_method
== PM_METHOD_DPM
) &&
8449 rdev
->pm
.dpm_enabled
) {
8451 radeon_dpm_get_mclk(rdev
, true) * 10;
8453 radeon_dpm_get_sclk(rdev
, true) * 10;
8455 wm_low
.yclk
= rdev
->pm
.current_mclk
* 10;
8456 wm_low
.sclk
= rdev
->pm
.current_sclk
* 10;
8459 wm_low
.disp_clk
= mode
->clock
;
8460 wm_low
.src_width
= mode
->crtc_hdisplay
;
8461 wm_low
.active_time
= mode
->crtc_hdisplay
* pixel_period
;
8462 wm_low
.blank_time
= line_time
- wm_low
.active_time
;
8463 wm_low
.interlaced
= false;
8464 if (mode
->flags
& DRM_MODE_FLAG_INTERLACE
)
8465 wm_low
.interlaced
= true;
8466 wm_low
.vsc
= radeon_crtc
->vsc
;
8468 if (radeon_crtc
->rmx_type
!= RMX_OFF
)
8470 wm_low
.bytes_per_pixel
= 4; /* XXX: get this from fb config */
8471 wm_low
.lb_size
= lb_size
;
8472 wm_low
.dram_channels
= cik_get_number_of_dram_channels(rdev
);
8473 wm_low
.num_heads
= num_heads
;
8475 /* set for low clocks */
8476 latency_watermark_b
= min(dce8_latency_watermark(&wm_low
), (u32
)65535);
8478 /* possibly force display priority to high */
8479 /* should really do this at mode validation time... */
8480 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low
) ||
8481 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low
) ||
8482 !dce8_check_latency_hiding(&wm_low
) ||
8483 (rdev
->disp_priority
== 2)) {
8484 DRM_DEBUG_KMS("force priority to high\n");
8489 wm_mask
= RREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
);
8491 tmp
&= ~LATENCY_WATERMARK_MASK(3);
8492 tmp
|= LATENCY_WATERMARK_MASK(1);
8493 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, tmp
);
8494 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
8495 (LATENCY_LOW_WATERMARK(latency_watermark_a
) |
8496 LATENCY_HIGH_WATERMARK(line_time
)));
8498 tmp
= RREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
);
8499 tmp
&= ~LATENCY_WATERMARK_MASK(3);
8500 tmp
|= LATENCY_WATERMARK_MASK(2);
8501 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, tmp
);
8502 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
8503 (LATENCY_LOW_WATERMARK(latency_watermark_b
) |
8504 LATENCY_HIGH_WATERMARK(line_time
)));
8505 /* restore original selection */
8506 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, wm_mask
);
8508 /* save values for DPM */
8509 radeon_crtc
->line_time
= line_time
;
8510 radeon_crtc
->wm_high
= latency_watermark_a
;
8511 radeon_crtc
->wm_low
= latency_watermark_b
;
8515 * dce8_bandwidth_update - program display watermarks
8517 * @rdev: radeon_device pointer
8519 * Calculate and program the display watermarks and line
8520 * buffer allocation (CIK).
8522 void dce8_bandwidth_update(struct radeon_device
*rdev
)
8524 struct drm_display_mode
*mode
= NULL
;
8525 u32 num_heads
= 0, lb_size
;
8528 radeon_update_display_priority(rdev
);
8530 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
8531 if (rdev
->mode_info
.crtcs
[i
]->base
.enabled
)
8534 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
8535 mode
= &rdev
->mode_info
.crtcs
[i
]->base
.mode
;
8536 lb_size
= dce8_line_buffer_adjust(rdev
, rdev
->mode_info
.crtcs
[i
], mode
);
8537 dce8_program_watermarks(rdev
, rdev
->mode_info
.crtcs
[i
], lb_size
, num_heads
);
8542 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8544 * @rdev: radeon_device pointer
8546 * Fetches a GPU clock counter snapshot (SI).
8547 * Returns the 64 bit clock counter snapshot.
8549 uint64_t cik_get_gpu_clock_counter(struct radeon_device
*rdev
)
8553 mutex_lock(&rdev
->gpu_clock_mutex
);
8554 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
8555 clock
= (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB
) |
8556 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
8557 mutex_unlock(&rdev
->gpu_clock_mutex
);
8561 static int cik_set_uvd_clock(struct radeon_device
*rdev
, u32 clock
,
8562 u32 cntl_reg
, u32 status_reg
)
8565 struct atom_clock_dividers dividers
;
8568 r
= radeon_atom_get_clock_dividers(rdev
, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK
,
8569 clock
, false, ÷rs
);
8573 tmp
= RREG32_SMC(cntl_reg
);
8574 tmp
&= ~(DCLK_DIR_CNTL_EN
|DCLK_DIVIDER_MASK
);
8575 tmp
|= dividers
.post_divider
;
8576 WREG32_SMC(cntl_reg
, tmp
);
8578 for (i
= 0; i
< 100; i
++) {
8579 if (RREG32_SMC(status_reg
) & DCLK_STATUS
)
8589 int cik_set_uvd_clocks(struct radeon_device
*rdev
, u32 vclk
, u32 dclk
)
8593 r
= cik_set_uvd_clock(rdev
, vclk
, CG_VCLK_CNTL
, CG_VCLK_STATUS
);
8597 r
= cik_set_uvd_clock(rdev
, dclk
, CG_DCLK_CNTL
, CG_DCLK_STATUS
);
8601 int cik_uvd_resume(struct radeon_device
*rdev
)
8607 r
= radeon_uvd_resume(rdev
);
8611 /* programm the VCPU memory controller bits 0-27 */
8612 addr
= rdev
->uvd
.gpu_addr
>> 3;
8613 size
= RADEON_GPU_PAGE_ALIGN(rdev
->uvd_fw
->size
+ 4) >> 3;
8614 WREG32(UVD_VCPU_CACHE_OFFSET0
, addr
);
8615 WREG32(UVD_VCPU_CACHE_SIZE0
, size
);
8618 size
= RADEON_UVD_STACK_SIZE
>> 3;
8619 WREG32(UVD_VCPU_CACHE_OFFSET1
, addr
);
8620 WREG32(UVD_VCPU_CACHE_SIZE1
, size
);
8623 size
= RADEON_UVD_HEAP_SIZE
>> 3;
8624 WREG32(UVD_VCPU_CACHE_OFFSET2
, addr
);
8625 WREG32(UVD_VCPU_CACHE_SIZE2
, size
);
8628 addr
= (rdev
->uvd
.gpu_addr
>> 28) & 0xF;
8629 WREG32(UVD_LMI_ADDR_EXT
, (addr
<< 12) | (addr
<< 0));
8632 addr
= (rdev
->uvd
.gpu_addr
>> 32) & 0xFF;
8633 WREG32(UVD_LMI_EXT40_ADDR
, addr
| (0x9 << 16) | (0x1 << 31));
8638 static void cik_pcie_gen3_enable(struct radeon_device
*rdev
)
8640 struct pci_dev
*root
= rdev
->pdev
->bus
->self
;
8641 int bridge_pos
, gpu_pos
;
8642 u32 speed_cntl
, mask
, current_data_rate
;
8646 if (radeon_pcie_gen2
== 0)
8649 if (rdev
->flags
& RADEON_IS_IGP
)
8652 if (!(rdev
->flags
& RADEON_IS_PCIE
))
8655 ret
= drm_pcie_get_speed_cap_mask(rdev
->ddev
, &mask
);
8659 if (!(mask
& (DRM_PCIE_SPEED_50
| DRM_PCIE_SPEED_80
)))
8662 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
8663 current_data_rate
= (speed_cntl
& LC_CURRENT_DATA_RATE_MASK
) >>
8664 LC_CURRENT_DATA_RATE_SHIFT
;
8665 if (mask
& DRM_PCIE_SPEED_80
) {
8666 if (current_data_rate
== 2) {
8667 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8670 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8671 } else if (mask
& DRM_PCIE_SPEED_50
) {
8672 if (current_data_rate
== 1) {
8673 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8676 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8679 bridge_pos
= pci_pcie_cap(root
);
8683 gpu_pos
= pci_pcie_cap(rdev
->pdev
);
8687 if (mask
& DRM_PCIE_SPEED_80
) {
8688 /* re-try equalization if gen3 is not already enabled */
8689 if (current_data_rate
!= 2) {
8690 u16 bridge_cfg
, gpu_cfg
;
8691 u16 bridge_cfg2
, gpu_cfg2
;
8692 u32 max_lw
, current_lw
, tmp
;
8694 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, &bridge_cfg
);
8695 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, &gpu_cfg
);
8697 tmp16
= bridge_cfg
| PCI_EXP_LNKCTL_HAWD
;
8698 pci_write_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, tmp16
);
8700 tmp16
= gpu_cfg
| PCI_EXP_LNKCTL_HAWD
;
8701 pci_write_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, tmp16
);
8703 tmp
= RREG32_PCIE_PORT(PCIE_LC_STATUS1
);
8704 max_lw
= (tmp
& LC_DETECTED_LINK_WIDTH_MASK
) >> LC_DETECTED_LINK_WIDTH_SHIFT
;
8705 current_lw
= (tmp
& LC_OPERATING_LINK_WIDTH_MASK
) >> LC_OPERATING_LINK_WIDTH_SHIFT
;
8707 if (current_lw
< max_lw
) {
8708 tmp
= RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
);
8709 if (tmp
& LC_RENEGOTIATION_SUPPORT
) {
8710 tmp
&= ~(LC_LINK_WIDTH_MASK
| LC_UPCONFIGURE_DIS
);
8711 tmp
|= (max_lw
<< LC_LINK_WIDTH_SHIFT
);
8712 tmp
|= LC_UPCONFIGURE_SUPPORT
| LC_RENEGOTIATE_EN
| LC_RECONFIG_NOW
;
8713 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
, tmp
);
8717 for (i
= 0; i
< 10; i
++) {
8719 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_DEVSTA
, &tmp16
);
8720 if (tmp16
& PCI_EXP_DEVSTA_TRPND
)
8723 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, &bridge_cfg
);
8724 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, &gpu_cfg
);
8726 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL2
, &bridge_cfg2
);
8727 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, &gpu_cfg2
);
8729 tmp
= RREG32_PCIE_PORT(PCIE_LC_CNTL4
);
8730 tmp
|= LC_SET_QUIESCE
;
8731 WREG32_PCIE_PORT(PCIE_LC_CNTL4
, tmp
);
8733 tmp
= RREG32_PCIE_PORT(PCIE_LC_CNTL4
);
8735 WREG32_PCIE_PORT(PCIE_LC_CNTL4
, tmp
);
8740 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, &tmp16
);
8741 tmp16
&= ~PCI_EXP_LNKCTL_HAWD
;
8742 tmp16
|= (bridge_cfg
& PCI_EXP_LNKCTL_HAWD
);
8743 pci_write_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL
, tmp16
);
8745 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, &tmp16
);
8746 tmp16
&= ~PCI_EXP_LNKCTL_HAWD
;
8747 tmp16
|= (gpu_cfg
& PCI_EXP_LNKCTL_HAWD
);
8748 pci_write_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL
, tmp16
);
8751 pci_read_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL2
, &tmp16
);
8752 tmp16
&= ~((1 << 4) | (7 << 9));
8753 tmp16
|= (bridge_cfg2
& ((1 << 4) | (7 << 9)));
8754 pci_write_config_word(root
, bridge_pos
+ PCI_EXP_LNKCTL2
, tmp16
);
8756 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, &tmp16
);
8757 tmp16
&= ~((1 << 4) | (7 << 9));
8758 tmp16
|= (gpu_cfg2
& ((1 << 4) | (7 << 9)));
8759 pci_write_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, tmp16
);
8761 tmp
= RREG32_PCIE_PORT(PCIE_LC_CNTL4
);
8762 tmp
&= ~LC_SET_QUIESCE
;
8763 WREG32_PCIE_PORT(PCIE_LC_CNTL4
, tmp
);
8768 /* set the link speed */
8769 speed_cntl
|= LC_FORCE_EN_SW_SPEED_CHANGE
| LC_FORCE_DIS_HW_SPEED_CHANGE
;
8770 speed_cntl
&= ~LC_FORCE_DIS_SW_SPEED_CHANGE
;
8771 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
, speed_cntl
);
8773 pci_read_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, &tmp16
);
8775 if (mask
& DRM_PCIE_SPEED_80
)
8776 tmp16
|= 3; /* gen3 */
8777 else if (mask
& DRM_PCIE_SPEED_50
)
8778 tmp16
|= 2; /* gen2 */
8780 tmp16
|= 1; /* gen1 */
8781 pci_write_config_word(rdev
->pdev
, gpu_pos
+ PCI_EXP_LNKCTL2
, tmp16
);
8783 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
8784 speed_cntl
|= LC_INITIATE_LINK_SPEED_CHANGE
;
8785 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
, speed_cntl
);
8787 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
8788 speed_cntl
= RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL
);
8789 if ((speed_cntl
& LC_INITIATE_LINK_SPEED_CHANGE
) == 0)
8795 static void cik_program_aspm(struct radeon_device
*rdev
)
8798 bool disable_l0s
= false, disable_l1
= false, disable_plloff_in_l1
= false;
8799 bool disable_clkreq
= false;
8801 if (radeon_aspm
== 0)
8804 /* XXX double check IGPs */
8805 if (rdev
->flags
& RADEON_IS_IGP
)
8808 if (!(rdev
->flags
& RADEON_IS_PCIE
))
8811 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL
);
8812 data
&= ~LC_XMIT_N_FTS_MASK
;
8813 data
|= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN
;
8815 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL
, data
);
8817 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_CNTL3
);
8818 data
|= LC_GO_TO_RECOVERY
;
8820 WREG32_PCIE_PORT(PCIE_LC_CNTL3
, data
);
8822 orig
= data
= RREG32_PCIE_PORT(PCIE_P_CNTL
);
8823 data
|= P_IGNORE_EDB_ERR
;
8825 WREG32_PCIE_PORT(PCIE_P_CNTL
, data
);
8827 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_CNTL
);
8828 data
&= ~(LC_L0S_INACTIVITY_MASK
| LC_L1_INACTIVITY_MASK
);
8829 data
|= LC_PMI_TO_L1_DIS
;
8831 data
|= LC_L0S_INACTIVITY(7);
8834 data
|= LC_L1_INACTIVITY(7);
8835 data
&= ~LC_PMI_TO_L1_DIS
;
8837 WREG32_PCIE_PORT(PCIE_LC_CNTL
, data
);
8839 if (!disable_plloff_in_l1
) {
8840 bool clk_req_support
;
8842 orig
= data
= RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0
);
8843 data
&= ~(PLL_POWER_STATE_IN_OFF_0_MASK
| PLL_POWER_STATE_IN_TXS2_0_MASK
);
8844 data
|= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8846 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0
, data
);
8848 orig
= data
= RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1
);
8849 data
&= ~(PLL_POWER_STATE_IN_OFF_1_MASK
| PLL_POWER_STATE_IN_TXS2_1_MASK
);
8850 data
|= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8852 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1
, data
);
8854 orig
= data
= RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0
);
8855 data
&= ~(PLL_POWER_STATE_IN_OFF_0_MASK
| PLL_POWER_STATE_IN_TXS2_0_MASK
);
8856 data
|= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8858 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0
, data
);
8860 orig
= data
= RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1
);
8861 data
&= ~(PLL_POWER_STATE_IN_OFF_1_MASK
| PLL_POWER_STATE_IN_TXS2_1_MASK
);
8862 data
|= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8864 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1
, data
);
8866 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
);
8867 data
&= ~LC_DYN_LANES_PWR_STATE_MASK
;
8868 data
|= LC_DYN_LANES_PWR_STATE(3);
8870 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL
, data
);
8872 if (!disable_clkreq
) {
8873 struct pci_dev
*root
= rdev
->pdev
->bus
->self
;
8876 clk_req_support
= false;
8877 pcie_capability_read_dword(root
, PCI_EXP_LNKCAP
, &lnkcap
);
8878 if (lnkcap
& PCI_EXP_LNKCAP_CLKPM
)
8879 clk_req_support
= true;
8881 clk_req_support
= false;
8884 if (clk_req_support
) {
8885 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_CNTL2
);
8886 data
|= LC_ALLOW_PDWN_IN_L1
| LC_ALLOW_PDWN_IN_L23
;
8888 WREG32_PCIE_PORT(PCIE_LC_CNTL2
, data
);
8890 orig
= data
= RREG32_SMC(THM_CLK_CNTL
);
8891 data
&= ~(CMON_CLK_SEL_MASK
| TMON_CLK_SEL_MASK
);
8892 data
|= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8894 WREG32_SMC(THM_CLK_CNTL
, data
);
8896 orig
= data
= RREG32_SMC(MISC_CLK_CTRL
);
8897 data
&= ~(DEEP_SLEEP_CLK_SEL_MASK
| ZCLK_SEL_MASK
);
8898 data
|= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8900 WREG32_SMC(MISC_CLK_CTRL
, data
);
8902 orig
= data
= RREG32_SMC(CG_CLKPIN_CNTL
);
8903 data
&= ~BCLK_AS_XCLK
;
8905 WREG32_SMC(CG_CLKPIN_CNTL
, data
);
8907 orig
= data
= RREG32_SMC(CG_CLKPIN_CNTL_2
);
8908 data
&= ~FORCE_BIF_REFCLK_EN
;
8910 WREG32_SMC(CG_CLKPIN_CNTL_2
, data
);
8912 orig
= data
= RREG32_SMC(MPLL_BYPASSCLK_SEL
);
8913 data
&= ~MPLL_CLKOUT_SEL_MASK
;
8914 data
|= MPLL_CLKOUT_SEL(4);
8916 WREG32_SMC(MPLL_BYPASSCLK_SEL
, data
);
8921 WREG32_PCIE_PORT(PCIE_LC_CNTL
, data
);
8924 orig
= data
= RREG32_PCIE_PORT(PCIE_CNTL2
);
8925 data
|= SLV_MEM_LS_EN
| MST_MEM_LS_EN
| REPLAY_MEM_LS_EN
;
8927 WREG32_PCIE_PORT(PCIE_CNTL2
, data
);
8930 data
= RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL
);
8931 if((data
& LC_N_FTS_MASK
) == LC_N_FTS_MASK
) {
8932 data
= RREG32_PCIE_PORT(PCIE_LC_STATUS1
);
8933 if ((data
& LC_REVERSE_XMIT
) && (data
& LC_REVERSE_RCVR
)) {
8934 orig
= data
= RREG32_PCIE_PORT(PCIE_LC_CNTL
);
8935 data
&= ~LC_L0S_INACTIVITY_MASK
;
8937 WREG32_PCIE_PORT(PCIE_LC_CNTL
, data
);