drm/radeon: add dpm support for CI dGPUs (v2)
[deliverable/linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 static void cik_rlc_stop(struct radeon_device *rdev);
68 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
69 static void cik_program_aspm(struct radeon_device *rdev);
70 static void cik_init_pg(struct radeon_device *rdev);
71 static void cik_init_cg(struct radeon_device *rdev);
72
73 /* get temperature in millidegrees */
74 int ci_get_temp(struct radeon_device *rdev)
75 {
76 u32 temp;
77 int actual_temp = 0;
78
79 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
80 CTF_TEMP_SHIFT;
81
82 if (temp & 0x200)
83 actual_temp = 255;
84 else
85 actual_temp = temp & 0x1ff;
86
87 actual_temp = actual_temp * 1000;
88
89 return actual_temp;
90 }
91
92 /* get temperature in millidegrees */
93 int kv_get_temp(struct radeon_device *rdev)
94 {
95 u32 temp;
96 int actual_temp = 0;
97
98 temp = RREG32_SMC(0xC0300E0C);
99
100 if (temp)
101 actual_temp = (temp / 8) - 49;
102 else
103 actual_temp = 0;
104
105 actual_temp = actual_temp * 1000;
106
107 return actual_temp;
108 }
109
110 /*
111 * Indirect registers accessor
112 */
113 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
114 {
115 u32 r;
116
117 WREG32(PCIE_INDEX, reg);
118 (void)RREG32(PCIE_INDEX);
119 r = RREG32(PCIE_DATA);
120 return r;
121 }
122
123 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
124 {
125 WREG32(PCIE_INDEX, reg);
126 (void)RREG32(PCIE_INDEX);
127 WREG32(PCIE_DATA, v);
128 (void)RREG32(PCIE_DATA);
129 }
130
131 static const u32 spectre_rlc_save_restore_register_list[] =
132 {
133 (0x0e00 << 16) | (0xc12c >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0xc140 >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0xc150 >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0xc15c >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0xc168 >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0xc170 >> 2),
144 0x00000000,
145 (0x0e00 << 16) | (0xc178 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0xc204 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0xc2b4 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0xc2b8 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0xc2bc >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0xc2c0 >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x8228 >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0x829c >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0x869c >> 2),
162 0x00000000,
163 (0x0600 << 16) | (0x98f4 >> 2),
164 0x00000000,
165 (0x0e00 << 16) | (0x98f8 >> 2),
166 0x00000000,
167 (0x0e00 << 16) | (0x9900 >> 2),
168 0x00000000,
169 (0x0e00 << 16) | (0xc260 >> 2),
170 0x00000000,
171 (0x0e00 << 16) | (0x90e8 >> 2),
172 0x00000000,
173 (0x0e00 << 16) | (0x3c000 >> 2),
174 0x00000000,
175 (0x0e00 << 16) | (0x3c00c >> 2),
176 0x00000000,
177 (0x0e00 << 16) | (0x8c1c >> 2),
178 0x00000000,
179 (0x0e00 << 16) | (0x9700 >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0xcd20 >> 2),
182 0x00000000,
183 (0x4e00 << 16) | (0xcd20 >> 2),
184 0x00000000,
185 (0x5e00 << 16) | (0xcd20 >> 2),
186 0x00000000,
187 (0x6e00 << 16) | (0xcd20 >> 2),
188 0x00000000,
189 (0x7e00 << 16) | (0xcd20 >> 2),
190 0x00000000,
191 (0x8e00 << 16) | (0xcd20 >> 2),
192 0x00000000,
193 (0x9e00 << 16) | (0xcd20 >> 2),
194 0x00000000,
195 (0xae00 << 16) | (0xcd20 >> 2),
196 0x00000000,
197 (0xbe00 << 16) | (0xcd20 >> 2),
198 0x00000000,
199 (0x0e00 << 16) | (0x89bc >> 2),
200 0x00000000,
201 (0x0e00 << 16) | (0x8900 >> 2),
202 0x00000000,
203 0x3,
204 (0x0e00 << 16) | (0xc130 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc134 >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc1fc >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc208 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc264 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc268 >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc26c >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc270 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc274 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc278 >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc27c >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc280 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc284 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc288 >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0xc28c >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0xc290 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0xc294 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0xc298 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0xc29c >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0xc2a0 >> 2),
243 0x00000000,
244 (0x0e00 << 16) | (0xc2a4 >> 2),
245 0x00000000,
246 (0x0e00 << 16) | (0xc2a8 >> 2),
247 0x00000000,
248 (0x0e00 << 16) | (0xc2ac >> 2),
249 0x00000000,
250 (0x0e00 << 16) | (0xc2b0 >> 2),
251 0x00000000,
252 (0x0e00 << 16) | (0x301d0 >> 2),
253 0x00000000,
254 (0x0e00 << 16) | (0x30238 >> 2),
255 0x00000000,
256 (0x0e00 << 16) | (0x30250 >> 2),
257 0x00000000,
258 (0x0e00 << 16) | (0x30254 >> 2),
259 0x00000000,
260 (0x0e00 << 16) | (0x30258 >> 2),
261 0x00000000,
262 (0x0e00 << 16) | (0x3025c >> 2),
263 0x00000000,
264 (0x4e00 << 16) | (0xc900 >> 2),
265 0x00000000,
266 (0x5e00 << 16) | (0xc900 >> 2),
267 0x00000000,
268 (0x6e00 << 16) | (0xc900 >> 2),
269 0x00000000,
270 (0x7e00 << 16) | (0xc900 >> 2),
271 0x00000000,
272 (0x8e00 << 16) | (0xc900 >> 2),
273 0x00000000,
274 (0x9e00 << 16) | (0xc900 >> 2),
275 0x00000000,
276 (0xae00 << 16) | (0xc900 >> 2),
277 0x00000000,
278 (0xbe00 << 16) | (0xc900 >> 2),
279 0x00000000,
280 (0x4e00 << 16) | (0xc904 >> 2),
281 0x00000000,
282 (0x5e00 << 16) | (0xc904 >> 2),
283 0x00000000,
284 (0x6e00 << 16) | (0xc904 >> 2),
285 0x00000000,
286 (0x7e00 << 16) | (0xc904 >> 2),
287 0x00000000,
288 (0x8e00 << 16) | (0xc904 >> 2),
289 0x00000000,
290 (0x9e00 << 16) | (0xc904 >> 2),
291 0x00000000,
292 (0xae00 << 16) | (0xc904 >> 2),
293 0x00000000,
294 (0xbe00 << 16) | (0xc904 >> 2),
295 0x00000000,
296 (0x4e00 << 16) | (0xc908 >> 2),
297 0x00000000,
298 (0x5e00 << 16) | (0xc908 >> 2),
299 0x00000000,
300 (0x6e00 << 16) | (0xc908 >> 2),
301 0x00000000,
302 (0x7e00 << 16) | (0xc908 >> 2),
303 0x00000000,
304 (0x8e00 << 16) | (0xc908 >> 2),
305 0x00000000,
306 (0x9e00 << 16) | (0xc908 >> 2),
307 0x00000000,
308 (0xae00 << 16) | (0xc908 >> 2),
309 0x00000000,
310 (0xbe00 << 16) | (0xc908 >> 2),
311 0x00000000,
312 (0x4e00 << 16) | (0xc90c >> 2),
313 0x00000000,
314 (0x5e00 << 16) | (0xc90c >> 2),
315 0x00000000,
316 (0x6e00 << 16) | (0xc90c >> 2),
317 0x00000000,
318 (0x7e00 << 16) | (0xc90c >> 2),
319 0x00000000,
320 (0x8e00 << 16) | (0xc90c >> 2),
321 0x00000000,
322 (0x9e00 << 16) | (0xc90c >> 2),
323 0x00000000,
324 (0xae00 << 16) | (0xc90c >> 2),
325 0x00000000,
326 (0xbe00 << 16) | (0xc90c >> 2),
327 0x00000000,
328 (0x4e00 << 16) | (0xc910 >> 2),
329 0x00000000,
330 (0x5e00 << 16) | (0xc910 >> 2),
331 0x00000000,
332 (0x6e00 << 16) | (0xc910 >> 2),
333 0x00000000,
334 (0x7e00 << 16) | (0xc910 >> 2),
335 0x00000000,
336 (0x8e00 << 16) | (0xc910 >> 2),
337 0x00000000,
338 (0x9e00 << 16) | (0xc910 >> 2),
339 0x00000000,
340 (0xae00 << 16) | (0xc910 >> 2),
341 0x00000000,
342 (0xbe00 << 16) | (0xc910 >> 2),
343 0x00000000,
344 (0x0e00 << 16) | (0xc99c >> 2),
345 0x00000000,
346 (0x0e00 << 16) | (0x9834 >> 2),
347 0x00000000,
348 (0x0000 << 16) | (0x30f00 >> 2),
349 0x00000000,
350 (0x0001 << 16) | (0x30f00 >> 2),
351 0x00000000,
352 (0x0000 << 16) | (0x30f04 >> 2),
353 0x00000000,
354 (0x0001 << 16) | (0x30f04 >> 2),
355 0x00000000,
356 (0x0000 << 16) | (0x30f08 >> 2),
357 0x00000000,
358 (0x0001 << 16) | (0x30f08 >> 2),
359 0x00000000,
360 (0x0000 << 16) | (0x30f0c >> 2),
361 0x00000000,
362 (0x0001 << 16) | (0x30f0c >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x9b7c >> 2),
365 0x00000000,
366 (0x0e00 << 16) | (0x8a14 >> 2),
367 0x00000000,
368 (0x0e00 << 16) | (0x8a18 >> 2),
369 0x00000000,
370 (0x0600 << 16) | (0x30a00 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0x8bf0 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0x8bcc >> 2),
375 0x00000000,
376 (0x0e00 << 16) | (0x8b24 >> 2),
377 0x00000000,
378 (0x0e00 << 16) | (0x30a04 >> 2),
379 0x00000000,
380 (0x0600 << 16) | (0x30a10 >> 2),
381 0x00000000,
382 (0x0600 << 16) | (0x30a14 >> 2),
383 0x00000000,
384 (0x0600 << 16) | (0x30a18 >> 2),
385 0x00000000,
386 (0x0600 << 16) | (0x30a2c >> 2),
387 0x00000000,
388 (0x0e00 << 16) | (0xc700 >> 2),
389 0x00000000,
390 (0x0e00 << 16) | (0xc704 >> 2),
391 0x00000000,
392 (0x0e00 << 16) | (0xc708 >> 2),
393 0x00000000,
394 (0x0e00 << 16) | (0xc768 >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc770 >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc774 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc778 >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc77c >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc780 >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc784 >> 2),
407 0x00000000,
408 (0x0400 << 16) | (0xc788 >> 2),
409 0x00000000,
410 (0x0400 << 16) | (0xc78c >> 2),
411 0x00000000,
412 (0x0400 << 16) | (0xc798 >> 2),
413 0x00000000,
414 (0x0400 << 16) | (0xc79c >> 2),
415 0x00000000,
416 (0x0400 << 16) | (0xc7a0 >> 2),
417 0x00000000,
418 (0x0400 << 16) | (0xc7a4 >> 2),
419 0x00000000,
420 (0x0400 << 16) | (0xc7a8 >> 2),
421 0x00000000,
422 (0x0400 << 16) | (0xc7ac >> 2),
423 0x00000000,
424 (0x0400 << 16) | (0xc7b0 >> 2),
425 0x00000000,
426 (0x0400 << 16) | (0xc7b4 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x9100 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x3c010 >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x92a8 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x92ac >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x92b4 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x92b8 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x92bc >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x92c0 >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0x92c4 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0x92c8 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0x92cc >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0x92d0 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0x8c00 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0x8c04 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0x8c20 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0x8c38 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0x8c3c >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xae00 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0x9604 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac08 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac0c >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac10 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac14 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac58 >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0xac68 >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0xac6c >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0xac70 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0xac74 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0xac78 >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0xac7c >> 2),
487 0x00000000,
488 (0x0e00 << 16) | (0xac80 >> 2),
489 0x00000000,
490 (0x0e00 << 16) | (0xac84 >> 2),
491 0x00000000,
492 (0x0e00 << 16) | (0xac88 >> 2),
493 0x00000000,
494 (0x0e00 << 16) | (0xac8c >> 2),
495 0x00000000,
496 (0x0e00 << 16) | (0x970c >> 2),
497 0x00000000,
498 (0x0e00 << 16) | (0x9714 >> 2),
499 0x00000000,
500 (0x0e00 << 16) | (0x9718 >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0x971c >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0x31068 >> 2),
505 0x00000000,
506 (0x4e00 << 16) | (0x31068 >> 2),
507 0x00000000,
508 (0x5e00 << 16) | (0x31068 >> 2),
509 0x00000000,
510 (0x6e00 << 16) | (0x31068 >> 2),
511 0x00000000,
512 (0x7e00 << 16) | (0x31068 >> 2),
513 0x00000000,
514 (0x8e00 << 16) | (0x31068 >> 2),
515 0x00000000,
516 (0x9e00 << 16) | (0x31068 >> 2),
517 0x00000000,
518 (0xae00 << 16) | (0x31068 >> 2),
519 0x00000000,
520 (0xbe00 << 16) | (0x31068 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0xcd10 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0xcd14 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x88b0 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x88b4 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x88b8 >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x88bc >> 2),
533 0x00000000,
534 (0x0400 << 16) | (0x89c0 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x88c4 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x88c8 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x88d0 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x88d4 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x88d8 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x8980 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x30938 >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0x3093c >> 2),
551 0x00000000,
552 (0x0e00 << 16) | (0x30940 >> 2),
553 0x00000000,
554 (0x0e00 << 16) | (0x89a0 >> 2),
555 0x00000000,
556 (0x0e00 << 16) | (0x30900 >> 2),
557 0x00000000,
558 (0x0e00 << 16) | (0x30904 >> 2),
559 0x00000000,
560 (0x0e00 << 16) | (0x89b4 >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0x3c210 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0x3c214 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0x3c218 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0x8904 >> 2),
569 0x00000000,
570 0x5,
571 (0x0e00 << 16) | (0x8c28 >> 2),
572 (0x0e00 << 16) | (0x8c2c >> 2),
573 (0x0e00 << 16) | (0x8c30 >> 2),
574 (0x0e00 << 16) | (0x8c34 >> 2),
575 (0x0e00 << 16) | (0x9600 >> 2),
576 };
577
578 static const u32 kalindi_rlc_save_restore_register_list[] =
579 {
580 (0x0e00 << 16) | (0xc12c >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0xc140 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0xc150 >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0xc15c >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0xc168 >> 2),
589 0x00000000,
590 (0x0e00 << 16) | (0xc170 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0xc204 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0xc2b4 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0xc2b8 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0xc2bc >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0xc2c0 >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x8228 >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x829c >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0x869c >> 2),
607 0x00000000,
608 (0x0600 << 16) | (0x98f4 >> 2),
609 0x00000000,
610 (0x0e00 << 16) | (0x98f8 >> 2),
611 0x00000000,
612 (0x0e00 << 16) | (0x9900 >> 2),
613 0x00000000,
614 (0x0e00 << 16) | (0xc260 >> 2),
615 0x00000000,
616 (0x0e00 << 16) | (0x90e8 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x3c000 >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0x3c00c >> 2),
621 0x00000000,
622 (0x0e00 << 16) | (0x8c1c >> 2),
623 0x00000000,
624 (0x0e00 << 16) | (0x9700 >> 2),
625 0x00000000,
626 (0x0e00 << 16) | (0xcd20 >> 2),
627 0x00000000,
628 (0x4e00 << 16) | (0xcd20 >> 2),
629 0x00000000,
630 (0x5e00 << 16) | (0xcd20 >> 2),
631 0x00000000,
632 (0x6e00 << 16) | (0xcd20 >> 2),
633 0x00000000,
634 (0x7e00 << 16) | (0xcd20 >> 2),
635 0x00000000,
636 (0x0e00 << 16) | (0x89bc >> 2),
637 0x00000000,
638 (0x0e00 << 16) | (0x8900 >> 2),
639 0x00000000,
640 0x3,
641 (0x0e00 << 16) | (0xc130 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc134 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc1fc >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc208 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc264 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc268 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc26c >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0xc270 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0xc274 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0xc28c >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0xc290 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0xc294 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0xc298 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0xc2a0 >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0xc2a4 >> 2),
670 0x00000000,
671 (0x0e00 << 16) | (0xc2a8 >> 2),
672 0x00000000,
673 (0x0e00 << 16) | (0xc2ac >> 2),
674 0x00000000,
675 (0x0e00 << 16) | (0x301d0 >> 2),
676 0x00000000,
677 (0x0e00 << 16) | (0x30238 >> 2),
678 0x00000000,
679 (0x0e00 << 16) | (0x30250 >> 2),
680 0x00000000,
681 (0x0e00 << 16) | (0x30254 >> 2),
682 0x00000000,
683 (0x0e00 << 16) | (0x30258 >> 2),
684 0x00000000,
685 (0x0e00 << 16) | (0x3025c >> 2),
686 0x00000000,
687 (0x4e00 << 16) | (0xc900 >> 2),
688 0x00000000,
689 (0x5e00 << 16) | (0xc900 >> 2),
690 0x00000000,
691 (0x6e00 << 16) | (0xc900 >> 2),
692 0x00000000,
693 (0x7e00 << 16) | (0xc900 >> 2),
694 0x00000000,
695 (0x4e00 << 16) | (0xc904 >> 2),
696 0x00000000,
697 (0x5e00 << 16) | (0xc904 >> 2),
698 0x00000000,
699 (0x6e00 << 16) | (0xc904 >> 2),
700 0x00000000,
701 (0x7e00 << 16) | (0xc904 >> 2),
702 0x00000000,
703 (0x4e00 << 16) | (0xc908 >> 2),
704 0x00000000,
705 (0x5e00 << 16) | (0xc908 >> 2),
706 0x00000000,
707 (0x6e00 << 16) | (0xc908 >> 2),
708 0x00000000,
709 (0x7e00 << 16) | (0xc908 >> 2),
710 0x00000000,
711 (0x4e00 << 16) | (0xc90c >> 2),
712 0x00000000,
713 (0x5e00 << 16) | (0xc90c >> 2),
714 0x00000000,
715 (0x6e00 << 16) | (0xc90c >> 2),
716 0x00000000,
717 (0x7e00 << 16) | (0xc90c >> 2),
718 0x00000000,
719 (0x4e00 << 16) | (0xc910 >> 2),
720 0x00000000,
721 (0x5e00 << 16) | (0xc910 >> 2),
722 0x00000000,
723 (0x6e00 << 16) | (0xc910 >> 2),
724 0x00000000,
725 (0x7e00 << 16) | (0xc910 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0xc99c >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x9834 >> 2),
730 0x00000000,
731 (0x0000 << 16) | (0x30f00 >> 2),
732 0x00000000,
733 (0x0000 << 16) | (0x30f04 >> 2),
734 0x00000000,
735 (0x0000 << 16) | (0x30f08 >> 2),
736 0x00000000,
737 (0x0000 << 16) | (0x30f0c >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x9b7c >> 2),
740 0x00000000,
741 (0x0e00 << 16) | (0x8a14 >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0x8a18 >> 2),
744 0x00000000,
745 (0x0600 << 16) | (0x30a00 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0x8bf0 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0x8bcc >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0x8b24 >> 2),
752 0x00000000,
753 (0x0e00 << 16) | (0x30a04 >> 2),
754 0x00000000,
755 (0x0600 << 16) | (0x30a10 >> 2),
756 0x00000000,
757 (0x0600 << 16) | (0x30a14 >> 2),
758 0x00000000,
759 (0x0600 << 16) | (0x30a18 >> 2),
760 0x00000000,
761 (0x0600 << 16) | (0x30a2c >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0xc700 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0xc704 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0xc708 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0xc768 >> 2),
770 0x00000000,
771 (0x0400 << 16) | (0xc770 >> 2),
772 0x00000000,
773 (0x0400 << 16) | (0xc774 >> 2),
774 0x00000000,
775 (0x0400 << 16) | (0xc798 >> 2),
776 0x00000000,
777 (0x0400 << 16) | (0xc79c >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0x9100 >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0x3c010 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0x8c00 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0x8c04 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0x8c20 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0x8c38 >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0x8c3c >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xae00 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0x9604 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac08 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac0c >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac10 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac14 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac58 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xac68 >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0xac6c >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0xac70 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0xac74 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0xac78 >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0xac7c >> 2),
818 0x00000000,
819 (0x0e00 << 16) | (0xac80 >> 2),
820 0x00000000,
821 (0x0e00 << 16) | (0xac84 >> 2),
822 0x00000000,
823 (0x0e00 << 16) | (0xac88 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0xac8c >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0x970c >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0x9714 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x9718 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x971c >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x31068 >> 2),
836 0x00000000,
837 (0x4e00 << 16) | (0x31068 >> 2),
838 0x00000000,
839 (0x5e00 << 16) | (0x31068 >> 2),
840 0x00000000,
841 (0x6e00 << 16) | (0x31068 >> 2),
842 0x00000000,
843 (0x7e00 << 16) | (0x31068 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0xcd10 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0xcd14 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x88b0 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x88b4 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x88b8 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x88bc >> 2),
856 0x00000000,
857 (0x0400 << 16) | (0x89c0 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x88c4 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x88c8 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x88d0 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x88d4 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x88d8 >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x8980 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x30938 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x3093c >> 2),
874 0x00000000,
875 (0x0e00 << 16) | (0x30940 >> 2),
876 0x00000000,
877 (0x0e00 << 16) | (0x89a0 >> 2),
878 0x00000000,
879 (0x0e00 << 16) | (0x30900 >> 2),
880 0x00000000,
881 (0x0e00 << 16) | (0x30904 >> 2),
882 0x00000000,
883 (0x0e00 << 16) | (0x89b4 >> 2),
884 0x00000000,
885 (0x0e00 << 16) | (0x3e1fc >> 2),
886 0x00000000,
887 (0x0e00 << 16) | (0x3c210 >> 2),
888 0x00000000,
889 (0x0e00 << 16) | (0x3c214 >> 2),
890 0x00000000,
891 (0x0e00 << 16) | (0x3c218 >> 2),
892 0x00000000,
893 (0x0e00 << 16) | (0x8904 >> 2),
894 0x00000000,
895 0x5,
896 (0x0e00 << 16) | (0x8c28 >> 2),
897 (0x0e00 << 16) | (0x8c2c >> 2),
898 (0x0e00 << 16) | (0x8c30 >> 2),
899 (0x0e00 << 16) | (0x8c34 >> 2),
900 (0x0e00 << 16) | (0x9600 >> 2),
901 };
902
903 static const u32 bonaire_golden_spm_registers[] =
904 {
905 0x30800, 0xe0ffffff, 0xe0000000
906 };
907
908 static const u32 bonaire_golden_common_registers[] =
909 {
910 0xc770, 0xffffffff, 0x00000800,
911 0xc774, 0xffffffff, 0x00000800,
912 0xc798, 0xffffffff, 0x00007fbf,
913 0xc79c, 0xffffffff, 0x00007faf
914 };
915
916 static const u32 bonaire_golden_registers[] =
917 {
918 0x3354, 0x00000333, 0x00000333,
919 0x3350, 0x000c0fc0, 0x00040200,
920 0x9a10, 0x00010000, 0x00058208,
921 0x3c000, 0xffff1fff, 0x00140000,
922 0x3c200, 0xfdfc0fff, 0x00000100,
923 0x3c234, 0x40000000, 0x40000200,
924 0x9830, 0xffffffff, 0x00000000,
925 0x9834, 0xf00fffff, 0x00000400,
926 0x9838, 0x0002021c, 0x00020200,
927 0xc78, 0x00000080, 0x00000000,
928 0x5bb0, 0x000000f0, 0x00000070,
929 0x5bc0, 0xf0311fff, 0x80300000,
930 0x98f8, 0x73773777, 0x12010001,
931 0x350c, 0x00810000, 0x408af000,
932 0x7030, 0x31000111, 0x00000011,
933 0x2f48, 0x73773777, 0x12010001,
934 0x220c, 0x00007fb6, 0x0021a1b1,
935 0x2210, 0x00007fb6, 0x002021b1,
936 0x2180, 0x00007fb6, 0x00002191,
937 0x2218, 0x00007fb6, 0x002121b1,
938 0x221c, 0x00007fb6, 0x002021b1,
939 0x21dc, 0x00007fb6, 0x00002191,
940 0x21e0, 0x00007fb6, 0x00002191,
941 0x3628, 0x0000003f, 0x0000000a,
942 0x362c, 0x0000003f, 0x0000000a,
943 0x2ae4, 0x00073ffe, 0x000022a2,
944 0x240c, 0x000007ff, 0x00000000,
945 0x8a14, 0xf000003f, 0x00000007,
946 0x8bf0, 0x00002001, 0x00000001,
947 0x8b24, 0xffffffff, 0x00ffffff,
948 0x30a04, 0x0000ff0f, 0x00000000,
949 0x28a4c, 0x07ffffff, 0x06000000,
950 0x4d8, 0x00000fff, 0x00000100,
951 0x3e78, 0x00000001, 0x00000002,
952 0x9100, 0x03000000, 0x0362c688,
953 0x8c00, 0x000000ff, 0x00000001,
954 0xe40, 0x00001fff, 0x00001fff,
955 0x9060, 0x0000007f, 0x00000020,
956 0x9508, 0x00010000, 0x00010000,
957 0xac14, 0x000003ff, 0x000000f3,
958 0xac0c, 0xffffffff, 0x00001032
959 };
960
961 static const u32 bonaire_mgcg_cgcg_init[] =
962 {
963 0xc420, 0xffffffff, 0xfffffffc,
964 0x30800, 0xffffffff, 0xe0000000,
965 0x3c2a0, 0xffffffff, 0x00000100,
966 0x3c208, 0xffffffff, 0x00000100,
967 0x3c2c0, 0xffffffff, 0xc0000100,
968 0x3c2c8, 0xffffffff, 0xc0000100,
969 0x3c2c4, 0xffffffff, 0xc0000100,
970 0x55e4, 0xffffffff, 0x00600100,
971 0x3c280, 0xffffffff, 0x00000100,
972 0x3c214, 0xffffffff, 0x06000100,
973 0x3c220, 0xffffffff, 0x00000100,
974 0x3c218, 0xffffffff, 0x06000100,
975 0x3c204, 0xffffffff, 0x00000100,
976 0x3c2e0, 0xffffffff, 0x00000100,
977 0x3c224, 0xffffffff, 0x00000100,
978 0x3c200, 0xffffffff, 0x00000100,
979 0x3c230, 0xffffffff, 0x00000100,
980 0x3c234, 0xffffffff, 0x00000100,
981 0x3c250, 0xffffffff, 0x00000100,
982 0x3c254, 0xffffffff, 0x00000100,
983 0x3c258, 0xffffffff, 0x00000100,
984 0x3c25c, 0xffffffff, 0x00000100,
985 0x3c260, 0xffffffff, 0x00000100,
986 0x3c27c, 0xffffffff, 0x00000100,
987 0x3c278, 0xffffffff, 0x00000100,
988 0x3c210, 0xffffffff, 0x06000100,
989 0x3c290, 0xffffffff, 0x00000100,
990 0x3c274, 0xffffffff, 0x00000100,
991 0x3c2b4, 0xffffffff, 0x00000100,
992 0x3c2b0, 0xffffffff, 0x00000100,
993 0x3c270, 0xffffffff, 0x00000100,
994 0x30800, 0xffffffff, 0xe0000000,
995 0x3c020, 0xffffffff, 0x00010000,
996 0x3c024, 0xffffffff, 0x00030002,
997 0x3c028, 0xffffffff, 0x00040007,
998 0x3c02c, 0xffffffff, 0x00060005,
999 0x3c030, 0xffffffff, 0x00090008,
1000 0x3c034, 0xffffffff, 0x00010000,
1001 0x3c038, 0xffffffff, 0x00030002,
1002 0x3c03c, 0xffffffff, 0x00040007,
1003 0x3c040, 0xffffffff, 0x00060005,
1004 0x3c044, 0xffffffff, 0x00090008,
1005 0x3c048, 0xffffffff, 0x00010000,
1006 0x3c04c, 0xffffffff, 0x00030002,
1007 0x3c050, 0xffffffff, 0x00040007,
1008 0x3c054, 0xffffffff, 0x00060005,
1009 0x3c058, 0xffffffff, 0x00090008,
1010 0x3c05c, 0xffffffff, 0x00010000,
1011 0x3c060, 0xffffffff, 0x00030002,
1012 0x3c064, 0xffffffff, 0x00040007,
1013 0x3c068, 0xffffffff, 0x00060005,
1014 0x3c06c, 0xffffffff, 0x00090008,
1015 0x3c070, 0xffffffff, 0x00010000,
1016 0x3c074, 0xffffffff, 0x00030002,
1017 0x3c078, 0xffffffff, 0x00040007,
1018 0x3c07c, 0xffffffff, 0x00060005,
1019 0x3c080, 0xffffffff, 0x00090008,
1020 0x3c084, 0xffffffff, 0x00010000,
1021 0x3c088, 0xffffffff, 0x00030002,
1022 0x3c08c, 0xffffffff, 0x00040007,
1023 0x3c090, 0xffffffff, 0x00060005,
1024 0x3c094, 0xffffffff, 0x00090008,
1025 0x3c098, 0xffffffff, 0x00010000,
1026 0x3c09c, 0xffffffff, 0x00030002,
1027 0x3c0a0, 0xffffffff, 0x00040007,
1028 0x3c0a4, 0xffffffff, 0x00060005,
1029 0x3c0a8, 0xffffffff, 0x00090008,
1030 0x3c000, 0xffffffff, 0x96e00200,
1031 0x8708, 0xffffffff, 0x00900100,
1032 0xc424, 0xffffffff, 0x0020003f,
1033 0x38, 0xffffffff, 0x0140001c,
1034 0x3c, 0x000f0000, 0x000f0000,
1035 0x220, 0xffffffff, 0xC060000C,
1036 0x224, 0xc0000fff, 0x00000100,
1037 0xf90, 0xffffffff, 0x00000100,
1038 0xf98, 0x00000101, 0x00000000,
1039 0x20a8, 0xffffffff, 0x00000104,
1040 0x55e4, 0xff000fff, 0x00000100,
1041 0x30cc, 0xc0000fff, 0x00000104,
1042 0xc1e4, 0x00000001, 0x00000001,
1043 0xd00c, 0xff000ff0, 0x00000100,
1044 0xd80c, 0xff000ff0, 0x00000100
1045 };
1046
1047 static const u32 spectre_golden_spm_registers[] =
1048 {
1049 0x30800, 0xe0ffffff, 0xe0000000
1050 };
1051
1052 static const u32 spectre_golden_common_registers[] =
1053 {
1054 0xc770, 0xffffffff, 0x00000800,
1055 0xc774, 0xffffffff, 0x00000800,
1056 0xc798, 0xffffffff, 0x00007fbf,
1057 0xc79c, 0xffffffff, 0x00007faf
1058 };
1059
1060 static const u32 spectre_golden_registers[] =
1061 {
1062 0x3c000, 0xffff1fff, 0x96940200,
1063 0x3c00c, 0xffff0001, 0xff000000,
1064 0x3c200, 0xfffc0fff, 0x00000100,
1065 0x6ed8, 0x00010101, 0x00010000,
1066 0x9834, 0xf00fffff, 0x00000400,
1067 0x9838, 0xfffffffc, 0x00020200,
1068 0x5bb0, 0x000000f0, 0x00000070,
1069 0x5bc0, 0xf0311fff, 0x80300000,
1070 0x98f8, 0x73773777, 0x12010001,
1071 0x9b7c, 0x00ff0000, 0x00fc0000,
1072 0x2f48, 0x73773777, 0x12010001,
1073 0x8a14, 0xf000003f, 0x00000007,
1074 0x8b24, 0xffffffff, 0x00ffffff,
1075 0x28350, 0x3f3f3fff, 0x00000082,
1076 0x28355, 0x0000003f, 0x00000000,
1077 0x3e78, 0x00000001, 0x00000002,
1078 0x913c, 0xffff03df, 0x00000004,
1079 0xc768, 0x00000008, 0x00000008,
1080 0x8c00, 0x000008ff, 0x00000800,
1081 0x9508, 0x00010000, 0x00010000,
1082 0xac0c, 0xffffffff, 0x54763210,
1083 0x214f8, 0x01ff01ff, 0x00000002,
1084 0x21498, 0x007ff800, 0x00200000,
1085 0x2015c, 0xffffffff, 0x00000f40,
1086 0x30934, 0xffffffff, 0x00000001
1087 };
1088
1089 static const u32 spectre_mgcg_cgcg_init[] =
1090 {
1091 0xc420, 0xffffffff, 0xfffffffc,
1092 0x30800, 0xffffffff, 0xe0000000,
1093 0x3c2a0, 0xffffffff, 0x00000100,
1094 0x3c208, 0xffffffff, 0x00000100,
1095 0x3c2c0, 0xffffffff, 0x00000100,
1096 0x3c2c8, 0xffffffff, 0x00000100,
1097 0x3c2c4, 0xffffffff, 0x00000100,
1098 0x55e4, 0xffffffff, 0x00600100,
1099 0x3c280, 0xffffffff, 0x00000100,
1100 0x3c214, 0xffffffff, 0x06000100,
1101 0x3c220, 0xffffffff, 0x00000100,
1102 0x3c218, 0xffffffff, 0x06000100,
1103 0x3c204, 0xffffffff, 0x00000100,
1104 0x3c2e0, 0xffffffff, 0x00000100,
1105 0x3c224, 0xffffffff, 0x00000100,
1106 0x3c200, 0xffffffff, 0x00000100,
1107 0x3c230, 0xffffffff, 0x00000100,
1108 0x3c234, 0xffffffff, 0x00000100,
1109 0x3c250, 0xffffffff, 0x00000100,
1110 0x3c254, 0xffffffff, 0x00000100,
1111 0x3c258, 0xffffffff, 0x00000100,
1112 0x3c25c, 0xffffffff, 0x00000100,
1113 0x3c260, 0xffffffff, 0x00000100,
1114 0x3c27c, 0xffffffff, 0x00000100,
1115 0x3c278, 0xffffffff, 0x00000100,
1116 0x3c210, 0xffffffff, 0x06000100,
1117 0x3c290, 0xffffffff, 0x00000100,
1118 0x3c274, 0xffffffff, 0x00000100,
1119 0x3c2b4, 0xffffffff, 0x00000100,
1120 0x3c2b0, 0xffffffff, 0x00000100,
1121 0x3c270, 0xffffffff, 0x00000100,
1122 0x30800, 0xffffffff, 0xe0000000,
1123 0x3c020, 0xffffffff, 0x00010000,
1124 0x3c024, 0xffffffff, 0x00030002,
1125 0x3c028, 0xffffffff, 0x00040007,
1126 0x3c02c, 0xffffffff, 0x00060005,
1127 0x3c030, 0xffffffff, 0x00090008,
1128 0x3c034, 0xffffffff, 0x00010000,
1129 0x3c038, 0xffffffff, 0x00030002,
1130 0x3c03c, 0xffffffff, 0x00040007,
1131 0x3c040, 0xffffffff, 0x00060005,
1132 0x3c044, 0xffffffff, 0x00090008,
1133 0x3c048, 0xffffffff, 0x00010000,
1134 0x3c04c, 0xffffffff, 0x00030002,
1135 0x3c050, 0xffffffff, 0x00040007,
1136 0x3c054, 0xffffffff, 0x00060005,
1137 0x3c058, 0xffffffff, 0x00090008,
1138 0x3c05c, 0xffffffff, 0x00010000,
1139 0x3c060, 0xffffffff, 0x00030002,
1140 0x3c064, 0xffffffff, 0x00040007,
1141 0x3c068, 0xffffffff, 0x00060005,
1142 0x3c06c, 0xffffffff, 0x00090008,
1143 0x3c070, 0xffffffff, 0x00010000,
1144 0x3c074, 0xffffffff, 0x00030002,
1145 0x3c078, 0xffffffff, 0x00040007,
1146 0x3c07c, 0xffffffff, 0x00060005,
1147 0x3c080, 0xffffffff, 0x00090008,
1148 0x3c084, 0xffffffff, 0x00010000,
1149 0x3c088, 0xffffffff, 0x00030002,
1150 0x3c08c, 0xffffffff, 0x00040007,
1151 0x3c090, 0xffffffff, 0x00060005,
1152 0x3c094, 0xffffffff, 0x00090008,
1153 0x3c098, 0xffffffff, 0x00010000,
1154 0x3c09c, 0xffffffff, 0x00030002,
1155 0x3c0a0, 0xffffffff, 0x00040007,
1156 0x3c0a4, 0xffffffff, 0x00060005,
1157 0x3c0a8, 0xffffffff, 0x00090008,
1158 0x3c0ac, 0xffffffff, 0x00010000,
1159 0x3c0b0, 0xffffffff, 0x00030002,
1160 0x3c0b4, 0xffffffff, 0x00040007,
1161 0x3c0b8, 0xffffffff, 0x00060005,
1162 0x3c0bc, 0xffffffff, 0x00090008,
1163 0x3c000, 0xffffffff, 0x96e00200,
1164 0x8708, 0xffffffff, 0x00900100,
1165 0xc424, 0xffffffff, 0x0020003f,
1166 0x38, 0xffffffff, 0x0140001c,
1167 0x3c, 0x000f0000, 0x000f0000,
1168 0x220, 0xffffffff, 0xC060000C,
1169 0x224, 0xc0000fff, 0x00000100,
1170 0xf90, 0xffffffff, 0x00000100,
1171 0xf98, 0x00000101, 0x00000000,
1172 0x20a8, 0xffffffff, 0x00000104,
1173 0x55e4, 0xff000fff, 0x00000100,
1174 0x30cc, 0xc0000fff, 0x00000104,
1175 0xc1e4, 0x00000001, 0x00000001,
1176 0xd00c, 0xff000ff0, 0x00000100,
1177 0xd80c, 0xff000ff0, 0x00000100
1178 };
1179
1180 static const u32 kalindi_golden_spm_registers[] =
1181 {
1182 0x30800, 0xe0ffffff, 0xe0000000
1183 };
1184
1185 static const u32 kalindi_golden_common_registers[] =
1186 {
1187 0xc770, 0xffffffff, 0x00000800,
1188 0xc774, 0xffffffff, 0x00000800,
1189 0xc798, 0xffffffff, 0x00007fbf,
1190 0xc79c, 0xffffffff, 0x00007faf
1191 };
1192
1193 static const u32 kalindi_golden_registers[] =
1194 {
1195 0x3c000, 0xffffdfff, 0x6e944040,
1196 0x55e4, 0xff607fff, 0xfc000100,
1197 0x3c220, 0xff000fff, 0x00000100,
1198 0x3c224, 0xff000fff, 0x00000100,
1199 0x3c200, 0xfffc0fff, 0x00000100,
1200 0x6ed8, 0x00010101, 0x00010000,
1201 0x9830, 0xffffffff, 0x00000000,
1202 0x9834, 0xf00fffff, 0x00000400,
1203 0x5bb0, 0x000000f0, 0x00000070,
1204 0x5bc0, 0xf0311fff, 0x80300000,
1205 0x98f8, 0x73773777, 0x12010001,
1206 0x98fc, 0xffffffff, 0x00000010,
1207 0x9b7c, 0x00ff0000, 0x00fc0000,
1208 0x8030, 0x00001f0f, 0x0000100a,
1209 0x2f48, 0x73773777, 0x12010001,
1210 0x2408, 0x000fffff, 0x000c007f,
1211 0x8a14, 0xf000003f, 0x00000007,
1212 0x8b24, 0x3fff3fff, 0x00ffcfff,
1213 0x30a04, 0x0000ff0f, 0x00000000,
1214 0x28a4c, 0x07ffffff, 0x06000000,
1215 0x4d8, 0x00000fff, 0x00000100,
1216 0x3e78, 0x00000001, 0x00000002,
1217 0xc768, 0x00000008, 0x00000008,
1218 0x8c00, 0x000000ff, 0x00000003,
1219 0x214f8, 0x01ff01ff, 0x00000002,
1220 0x21498, 0x007ff800, 0x00200000,
1221 0x2015c, 0xffffffff, 0x00000f40,
1222 0x88c4, 0x001f3ae3, 0x00000082,
1223 0x88d4, 0x0000001f, 0x00000010,
1224 0x30934, 0xffffffff, 0x00000000
1225 };
1226
1227 static const u32 kalindi_mgcg_cgcg_init[] =
1228 {
1229 0xc420, 0xffffffff, 0xfffffffc,
1230 0x30800, 0xffffffff, 0xe0000000,
1231 0x3c2a0, 0xffffffff, 0x00000100,
1232 0x3c208, 0xffffffff, 0x00000100,
1233 0x3c2c0, 0xffffffff, 0x00000100,
1234 0x3c2c8, 0xffffffff, 0x00000100,
1235 0x3c2c4, 0xffffffff, 0x00000100,
1236 0x55e4, 0xffffffff, 0x00600100,
1237 0x3c280, 0xffffffff, 0x00000100,
1238 0x3c214, 0xffffffff, 0x06000100,
1239 0x3c220, 0xffffffff, 0x00000100,
1240 0x3c218, 0xffffffff, 0x06000100,
1241 0x3c204, 0xffffffff, 0x00000100,
1242 0x3c2e0, 0xffffffff, 0x00000100,
1243 0x3c224, 0xffffffff, 0x00000100,
1244 0x3c200, 0xffffffff, 0x00000100,
1245 0x3c230, 0xffffffff, 0x00000100,
1246 0x3c234, 0xffffffff, 0x00000100,
1247 0x3c250, 0xffffffff, 0x00000100,
1248 0x3c254, 0xffffffff, 0x00000100,
1249 0x3c258, 0xffffffff, 0x00000100,
1250 0x3c25c, 0xffffffff, 0x00000100,
1251 0x3c260, 0xffffffff, 0x00000100,
1252 0x3c27c, 0xffffffff, 0x00000100,
1253 0x3c278, 0xffffffff, 0x00000100,
1254 0x3c210, 0xffffffff, 0x06000100,
1255 0x3c290, 0xffffffff, 0x00000100,
1256 0x3c274, 0xffffffff, 0x00000100,
1257 0x3c2b4, 0xffffffff, 0x00000100,
1258 0x3c2b0, 0xffffffff, 0x00000100,
1259 0x3c270, 0xffffffff, 0x00000100,
1260 0x30800, 0xffffffff, 0xe0000000,
1261 0x3c020, 0xffffffff, 0x00010000,
1262 0x3c024, 0xffffffff, 0x00030002,
1263 0x3c028, 0xffffffff, 0x00040007,
1264 0x3c02c, 0xffffffff, 0x00060005,
1265 0x3c030, 0xffffffff, 0x00090008,
1266 0x3c034, 0xffffffff, 0x00010000,
1267 0x3c038, 0xffffffff, 0x00030002,
1268 0x3c03c, 0xffffffff, 0x00040007,
1269 0x3c040, 0xffffffff, 0x00060005,
1270 0x3c044, 0xffffffff, 0x00090008,
1271 0x3c000, 0xffffffff, 0x96e00200,
1272 0x8708, 0xffffffff, 0x00900100,
1273 0xc424, 0xffffffff, 0x0020003f,
1274 0x38, 0xffffffff, 0x0140001c,
1275 0x3c, 0x000f0000, 0x000f0000,
1276 0x220, 0xffffffff, 0xC060000C,
1277 0x224, 0xc0000fff, 0x00000100,
1278 0x20a8, 0xffffffff, 0x00000104,
1279 0x55e4, 0xff000fff, 0x00000100,
1280 0x30cc, 0xc0000fff, 0x00000104,
1281 0xc1e4, 0x00000001, 0x00000001,
1282 0xd00c, 0xff000ff0, 0x00000100,
1283 0xd80c, 0xff000ff0, 0x00000100
1284 };
1285
1286 static void cik_init_golden_registers(struct radeon_device *rdev)
1287 {
1288 switch (rdev->family) {
1289 case CHIP_BONAIRE:
1290 radeon_program_register_sequence(rdev,
1291 bonaire_mgcg_cgcg_init,
1292 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1293 radeon_program_register_sequence(rdev,
1294 bonaire_golden_registers,
1295 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1296 radeon_program_register_sequence(rdev,
1297 bonaire_golden_common_registers,
1298 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1299 radeon_program_register_sequence(rdev,
1300 bonaire_golden_spm_registers,
1301 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1302 break;
1303 case CHIP_KABINI:
1304 radeon_program_register_sequence(rdev,
1305 kalindi_mgcg_cgcg_init,
1306 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1307 radeon_program_register_sequence(rdev,
1308 kalindi_golden_registers,
1309 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1310 radeon_program_register_sequence(rdev,
1311 kalindi_golden_common_registers,
1312 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1313 radeon_program_register_sequence(rdev,
1314 kalindi_golden_spm_registers,
1315 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1316 break;
1317 case CHIP_KAVERI:
1318 radeon_program_register_sequence(rdev,
1319 spectre_mgcg_cgcg_init,
1320 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1321 radeon_program_register_sequence(rdev,
1322 spectre_golden_registers,
1323 (const u32)ARRAY_SIZE(spectre_golden_registers));
1324 radeon_program_register_sequence(rdev,
1325 spectre_golden_common_registers,
1326 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1327 radeon_program_register_sequence(rdev,
1328 spectre_golden_spm_registers,
1329 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1330 break;
1331 default:
1332 break;
1333 }
1334 }
1335
1336 /**
1337 * cik_get_xclk - get the xclk
1338 *
1339 * @rdev: radeon_device pointer
1340 *
1341 * Returns the reference clock used by the gfx engine
1342 * (CIK).
1343 */
1344 u32 cik_get_xclk(struct radeon_device *rdev)
1345 {
1346 u32 reference_clock = rdev->clock.spll.reference_freq;
1347
1348 if (rdev->flags & RADEON_IS_IGP) {
1349 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1350 return reference_clock / 2;
1351 } else {
1352 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1353 return reference_clock / 4;
1354 }
1355 return reference_clock;
1356 }
1357
1358 /**
1359 * cik_mm_rdoorbell - read a doorbell dword
1360 *
1361 * @rdev: radeon_device pointer
1362 * @offset: byte offset into the aperture
1363 *
1364 * Returns the value in the doorbell aperture at the
1365 * requested offset (CIK).
1366 */
1367 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1368 {
1369 if (offset < rdev->doorbell.size) {
1370 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1371 } else {
1372 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1373 return 0;
1374 }
1375 }
1376
1377 /**
1378 * cik_mm_wdoorbell - write a doorbell dword
1379 *
1380 * @rdev: radeon_device pointer
1381 * @offset: byte offset into the aperture
1382 * @v: value to write
1383 *
1384 * Writes @v to the doorbell aperture at the
1385 * requested offset (CIK).
1386 */
1387 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1388 {
1389 if (offset < rdev->doorbell.size) {
1390 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1391 } else {
1392 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1393 }
1394 }
1395
1396 #define BONAIRE_IO_MC_REGS_SIZE 36
1397
1398 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1399 {
1400 {0x00000070, 0x04400000},
1401 {0x00000071, 0x80c01803},
1402 {0x00000072, 0x00004004},
1403 {0x00000073, 0x00000100},
1404 {0x00000074, 0x00ff0000},
1405 {0x00000075, 0x34000000},
1406 {0x00000076, 0x08000014},
1407 {0x00000077, 0x00cc08ec},
1408 {0x00000078, 0x00000400},
1409 {0x00000079, 0x00000000},
1410 {0x0000007a, 0x04090000},
1411 {0x0000007c, 0x00000000},
1412 {0x0000007e, 0x4408a8e8},
1413 {0x0000007f, 0x00000304},
1414 {0x00000080, 0x00000000},
1415 {0x00000082, 0x00000001},
1416 {0x00000083, 0x00000002},
1417 {0x00000084, 0xf3e4f400},
1418 {0x00000085, 0x052024e3},
1419 {0x00000087, 0x00000000},
1420 {0x00000088, 0x01000000},
1421 {0x0000008a, 0x1c0a0000},
1422 {0x0000008b, 0xff010000},
1423 {0x0000008d, 0xffffefff},
1424 {0x0000008e, 0xfff3efff},
1425 {0x0000008f, 0xfff3efbf},
1426 {0x00000092, 0xf7ffffff},
1427 {0x00000093, 0xffffff7f},
1428 {0x00000095, 0x00101101},
1429 {0x00000096, 0x00000fff},
1430 {0x00000097, 0x00116fff},
1431 {0x00000098, 0x60010000},
1432 {0x00000099, 0x10010000},
1433 {0x0000009a, 0x00006000},
1434 {0x0000009b, 0x00001000},
1435 {0x0000009f, 0x00b48000}
1436 };
1437
1438 /**
1439 * cik_srbm_select - select specific register instances
1440 *
1441 * @rdev: radeon_device pointer
1442 * @me: selected ME (micro engine)
1443 * @pipe: pipe
1444 * @queue: queue
1445 * @vmid: VMID
1446 *
1447 * Switches the currently active registers instances. Some
1448 * registers are instanced per VMID, others are instanced per
1449 * me/pipe/queue combination.
1450 */
1451 static void cik_srbm_select(struct radeon_device *rdev,
1452 u32 me, u32 pipe, u32 queue, u32 vmid)
1453 {
1454 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1455 MEID(me & 0x3) |
1456 VMID(vmid & 0xf) |
1457 QUEUEID(queue & 0x7));
1458 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1459 }
1460
1461 /* ucode loading */
1462 /**
1463 * ci_mc_load_microcode - load MC ucode into the hw
1464 *
1465 * @rdev: radeon_device pointer
1466 *
1467 * Load the GDDR MC ucode into the hw (CIK).
1468 * Returns 0 on success, error on failure.
1469 */
1470 static int ci_mc_load_microcode(struct radeon_device *rdev)
1471 {
1472 const __be32 *fw_data;
1473 u32 running, blackout = 0;
1474 u32 *io_mc_regs;
1475 int i, ucode_size, regs_size;
1476
1477 if (!rdev->mc_fw)
1478 return -EINVAL;
1479
1480 switch (rdev->family) {
1481 case CHIP_BONAIRE:
1482 default:
1483 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1484 ucode_size = CIK_MC_UCODE_SIZE;
1485 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1486 break;
1487 }
1488
1489 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1490
1491 if (running == 0) {
1492 if (running) {
1493 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1494 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1495 }
1496
1497 /* reset the engine and set to writable */
1498 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1499 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1500
1501 /* load mc io regs */
1502 for (i = 0; i < regs_size; i++) {
1503 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1504 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1505 }
1506 /* load the MC ucode */
1507 fw_data = (const __be32 *)rdev->mc_fw->data;
1508 for (i = 0; i < ucode_size; i++)
1509 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1510
1511 /* put the engine back into the active state */
1512 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1513 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1514 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1515
1516 /* wait for training to complete */
1517 for (i = 0; i < rdev->usec_timeout; i++) {
1518 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1519 break;
1520 udelay(1);
1521 }
1522 for (i = 0; i < rdev->usec_timeout; i++) {
1523 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1524 break;
1525 udelay(1);
1526 }
1527
1528 if (running)
1529 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1530 }
1531
1532 return 0;
1533 }
1534
1535 /**
1536 * cik_init_microcode - load ucode images from disk
1537 *
1538 * @rdev: radeon_device pointer
1539 *
1540 * Use the firmware interface to load the ucode images into
1541 * the driver (not loaded into hw).
1542 * Returns 0 on success, error on failure.
1543 */
1544 static int cik_init_microcode(struct radeon_device *rdev)
1545 {
1546 const char *chip_name;
1547 size_t pfp_req_size, me_req_size, ce_req_size,
1548 mec_req_size, rlc_req_size, mc_req_size,
1549 sdma_req_size, smc_req_size;
1550 char fw_name[30];
1551 int err;
1552
1553 DRM_DEBUG("\n");
1554
1555 switch (rdev->family) {
1556 case CHIP_BONAIRE:
1557 chip_name = "BONAIRE";
1558 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1559 me_req_size = CIK_ME_UCODE_SIZE * 4;
1560 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1561 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1562 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1563 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1564 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1565 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1566 break;
1567 case CHIP_KAVERI:
1568 chip_name = "KAVERI";
1569 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1570 me_req_size = CIK_ME_UCODE_SIZE * 4;
1571 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1572 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1573 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1574 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1575 break;
1576 case CHIP_KABINI:
1577 chip_name = "KABINI";
1578 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1579 me_req_size = CIK_ME_UCODE_SIZE * 4;
1580 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1581 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1582 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 break;
1585 default: BUG();
1586 }
1587
1588 DRM_INFO("Loading %s Microcode\n", chip_name);
1589
1590 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1591 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1592 if (err)
1593 goto out;
1594 if (rdev->pfp_fw->size != pfp_req_size) {
1595 printk(KERN_ERR
1596 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1597 rdev->pfp_fw->size, fw_name);
1598 err = -EINVAL;
1599 goto out;
1600 }
1601
1602 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1603 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1604 if (err)
1605 goto out;
1606 if (rdev->me_fw->size != me_req_size) {
1607 printk(KERN_ERR
1608 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1609 rdev->me_fw->size, fw_name);
1610 err = -EINVAL;
1611 }
1612
1613 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1614 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1615 if (err)
1616 goto out;
1617 if (rdev->ce_fw->size != ce_req_size) {
1618 printk(KERN_ERR
1619 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1620 rdev->ce_fw->size, fw_name);
1621 err = -EINVAL;
1622 }
1623
1624 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1625 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1626 if (err)
1627 goto out;
1628 if (rdev->mec_fw->size != mec_req_size) {
1629 printk(KERN_ERR
1630 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1631 rdev->mec_fw->size, fw_name);
1632 err = -EINVAL;
1633 }
1634
1635 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1636 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1637 if (err)
1638 goto out;
1639 if (rdev->rlc_fw->size != rlc_req_size) {
1640 printk(KERN_ERR
1641 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1642 rdev->rlc_fw->size, fw_name);
1643 err = -EINVAL;
1644 }
1645
1646 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1647 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1648 if (err)
1649 goto out;
1650 if (rdev->sdma_fw->size != sdma_req_size) {
1651 printk(KERN_ERR
1652 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1653 rdev->sdma_fw->size, fw_name);
1654 err = -EINVAL;
1655 }
1656
1657 /* No SMC, MC ucode on APUs */
1658 if (!(rdev->flags & RADEON_IS_IGP)) {
1659 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1660 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1661 if (err)
1662 goto out;
1663 if (rdev->mc_fw->size != mc_req_size) {
1664 printk(KERN_ERR
1665 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1666 rdev->mc_fw->size, fw_name);
1667 err = -EINVAL;
1668 }
1669
1670 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1671 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1672 if (err) {
1673 printk(KERN_ERR
1674 "smc: error loading firmware \"%s\"\n",
1675 fw_name);
1676 release_firmware(rdev->smc_fw);
1677 rdev->smc_fw = NULL;
1678 } else if (rdev->smc_fw->size != smc_req_size) {
1679 printk(KERN_ERR
1680 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1681 rdev->smc_fw->size, fw_name);
1682 err = -EINVAL;
1683 }
1684 }
1685
1686 out:
1687 if (err) {
1688 if (err != -EINVAL)
1689 printk(KERN_ERR
1690 "cik_cp: Failed to load firmware \"%s\"\n",
1691 fw_name);
1692 release_firmware(rdev->pfp_fw);
1693 rdev->pfp_fw = NULL;
1694 release_firmware(rdev->me_fw);
1695 rdev->me_fw = NULL;
1696 release_firmware(rdev->ce_fw);
1697 rdev->ce_fw = NULL;
1698 release_firmware(rdev->rlc_fw);
1699 rdev->rlc_fw = NULL;
1700 release_firmware(rdev->mc_fw);
1701 rdev->mc_fw = NULL;
1702 release_firmware(rdev->smc_fw);
1703 rdev->smc_fw = NULL;
1704 }
1705 return err;
1706 }
1707
1708 /*
1709 * Core functions
1710 */
1711 /**
1712 * cik_tiling_mode_table_init - init the hw tiling table
1713 *
1714 * @rdev: radeon_device pointer
1715 *
1716 * Starting with SI, the tiling setup is done globally in a
1717 * set of 32 tiling modes. Rather than selecting each set of
1718 * parameters per surface as on older asics, we just select
1719 * which index in the tiling table we want to use, and the
1720 * surface uses those parameters (CIK).
1721 */
1722 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1723 {
1724 const u32 num_tile_mode_states = 32;
1725 const u32 num_secondary_tile_mode_states = 16;
1726 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1727 u32 num_pipe_configs;
1728 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1729 rdev->config.cik.max_shader_engines;
1730
1731 switch (rdev->config.cik.mem_row_size_in_kb) {
1732 case 1:
1733 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1734 break;
1735 case 2:
1736 default:
1737 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1738 break;
1739 case 4:
1740 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1741 break;
1742 }
1743
1744 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1745 if (num_pipe_configs > 8)
1746 num_pipe_configs = 8; /* ??? */
1747
1748 if (num_pipe_configs == 8) {
1749 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1750 switch (reg_offset) {
1751 case 0:
1752 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1754 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1755 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1756 break;
1757 case 1:
1758 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1760 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1761 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1762 break;
1763 case 2:
1764 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1765 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1766 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1768 break;
1769 case 3:
1770 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1771 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1772 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1773 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1774 break;
1775 case 4:
1776 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1777 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1778 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1779 TILE_SPLIT(split_equal_to_row_size));
1780 break;
1781 case 5:
1782 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1784 break;
1785 case 6:
1786 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1788 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1789 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1790 break;
1791 case 7:
1792 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1793 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1794 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1795 TILE_SPLIT(split_equal_to_row_size));
1796 break;
1797 case 8:
1798 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1799 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1800 break;
1801 case 9:
1802 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1803 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1804 break;
1805 case 10:
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1810 break;
1811 case 11:
1812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1813 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1814 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1816 break;
1817 case 12:
1818 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1819 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1820 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1822 break;
1823 case 13:
1824 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1825 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1826 break;
1827 case 14:
1828 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1829 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1830 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1831 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1832 break;
1833 case 16:
1834 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1835 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1836 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1838 break;
1839 case 17:
1840 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1841 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1842 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1844 break;
1845 case 27:
1846 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1847 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1848 break;
1849 case 28:
1850 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1852 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1854 break;
1855 case 29:
1856 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1857 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1858 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1860 break;
1861 case 30:
1862 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1863 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1864 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1866 break;
1867 default:
1868 gb_tile_moden = 0;
1869 break;
1870 }
1871 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1872 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1873 }
1874 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1875 switch (reg_offset) {
1876 case 0:
1877 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1878 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1879 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1880 NUM_BANKS(ADDR_SURF_16_BANK));
1881 break;
1882 case 1:
1883 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1886 NUM_BANKS(ADDR_SURF_16_BANK));
1887 break;
1888 case 2:
1889 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1890 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1891 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1892 NUM_BANKS(ADDR_SURF_16_BANK));
1893 break;
1894 case 3:
1895 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1898 NUM_BANKS(ADDR_SURF_16_BANK));
1899 break;
1900 case 4:
1901 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1902 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1903 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1904 NUM_BANKS(ADDR_SURF_8_BANK));
1905 break;
1906 case 5:
1907 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1908 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1909 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1910 NUM_BANKS(ADDR_SURF_4_BANK));
1911 break;
1912 case 6:
1913 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1914 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1915 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1916 NUM_BANKS(ADDR_SURF_2_BANK));
1917 break;
1918 case 8:
1919 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1922 NUM_BANKS(ADDR_SURF_16_BANK));
1923 break;
1924 case 9:
1925 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1928 NUM_BANKS(ADDR_SURF_16_BANK));
1929 break;
1930 case 10:
1931 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1934 NUM_BANKS(ADDR_SURF_16_BANK));
1935 break;
1936 case 11:
1937 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1940 NUM_BANKS(ADDR_SURF_16_BANK));
1941 break;
1942 case 12:
1943 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1946 NUM_BANKS(ADDR_SURF_8_BANK));
1947 break;
1948 case 13:
1949 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1952 NUM_BANKS(ADDR_SURF_4_BANK));
1953 break;
1954 case 14:
1955 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1958 NUM_BANKS(ADDR_SURF_2_BANK));
1959 break;
1960 default:
1961 gb_tile_moden = 0;
1962 break;
1963 }
1964 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1965 }
1966 } else if (num_pipe_configs == 4) {
1967 if (num_rbs == 4) {
1968 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1969 switch (reg_offset) {
1970 case 0:
1971 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1972 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1973 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1974 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1975 break;
1976 case 1:
1977 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1979 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1981 break;
1982 case 2:
1983 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1984 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1985 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1986 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1987 break;
1988 case 3:
1989 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1991 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1993 break;
1994 case 4:
1995 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1996 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1997 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1998 TILE_SPLIT(split_equal_to_row_size));
1999 break;
2000 case 5:
2001 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2003 break;
2004 case 6:
2005 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2007 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2009 break;
2010 case 7:
2011 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2013 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2014 TILE_SPLIT(split_equal_to_row_size));
2015 break;
2016 case 8:
2017 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2018 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2019 break;
2020 case 9:
2021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2022 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2023 break;
2024 case 10:
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2029 break;
2030 case 11:
2031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2033 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2035 break;
2036 case 12:
2037 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2038 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2039 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2041 break;
2042 case 13:
2043 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2044 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2045 break;
2046 case 14:
2047 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2049 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2051 break;
2052 case 16:
2053 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2055 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2057 break;
2058 case 17:
2059 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2060 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2061 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2063 break;
2064 case 27:
2065 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2067 break;
2068 case 28:
2069 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2070 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2071 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 break;
2074 case 29:
2075 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2077 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 break;
2080 case 30:
2081 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2083 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2085 break;
2086 default:
2087 gb_tile_moden = 0;
2088 break;
2089 }
2090 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2091 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2092 }
2093 } else if (num_rbs < 4) {
2094 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2095 switch (reg_offset) {
2096 case 0:
2097 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2098 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2099 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2101 break;
2102 case 1:
2103 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2105 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2107 break;
2108 case 2:
2109 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2111 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2113 break;
2114 case 3:
2115 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2116 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2117 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2119 break;
2120 case 4:
2121 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2123 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2124 TILE_SPLIT(split_equal_to_row_size));
2125 break;
2126 case 5:
2127 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2128 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2129 break;
2130 case 6:
2131 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2132 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2133 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2134 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2135 break;
2136 case 7:
2137 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2139 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2140 TILE_SPLIT(split_equal_to_row_size));
2141 break;
2142 case 8:
2143 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2144 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2145 break;
2146 case 9:
2147 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2149 break;
2150 case 10:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2155 break;
2156 case 11:
2157 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161 break;
2162 case 12:
2163 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2165 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 break;
2168 case 13:
2169 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2171 break;
2172 case 14:
2173 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2175 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177 break;
2178 case 16:
2179 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183 break;
2184 case 17:
2185 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2186 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2187 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 break;
2190 case 27:
2191 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2193 break;
2194 case 28:
2195 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2197 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199 break;
2200 case 29:
2201 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2203 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205 break;
2206 case 30:
2207 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2208 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2209 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211 break;
2212 default:
2213 gb_tile_moden = 0;
2214 break;
2215 }
2216 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2217 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2218 }
2219 }
2220 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2221 switch (reg_offset) {
2222 case 0:
2223 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2226 NUM_BANKS(ADDR_SURF_16_BANK));
2227 break;
2228 case 1:
2229 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232 NUM_BANKS(ADDR_SURF_16_BANK));
2233 break;
2234 case 2:
2235 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2238 NUM_BANKS(ADDR_SURF_16_BANK));
2239 break;
2240 case 3:
2241 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244 NUM_BANKS(ADDR_SURF_16_BANK));
2245 break;
2246 case 4:
2247 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250 NUM_BANKS(ADDR_SURF_16_BANK));
2251 break;
2252 case 5:
2253 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256 NUM_BANKS(ADDR_SURF_8_BANK));
2257 break;
2258 case 6:
2259 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2262 NUM_BANKS(ADDR_SURF_4_BANK));
2263 break;
2264 case 8:
2265 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268 NUM_BANKS(ADDR_SURF_16_BANK));
2269 break;
2270 case 9:
2271 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274 NUM_BANKS(ADDR_SURF_16_BANK));
2275 break;
2276 case 10:
2277 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2280 NUM_BANKS(ADDR_SURF_16_BANK));
2281 break;
2282 case 11:
2283 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286 NUM_BANKS(ADDR_SURF_16_BANK));
2287 break;
2288 case 12:
2289 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292 NUM_BANKS(ADDR_SURF_16_BANK));
2293 break;
2294 case 13:
2295 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2298 NUM_BANKS(ADDR_SURF_8_BANK));
2299 break;
2300 case 14:
2301 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2304 NUM_BANKS(ADDR_SURF_4_BANK));
2305 break;
2306 default:
2307 gb_tile_moden = 0;
2308 break;
2309 }
2310 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2311 }
2312 } else if (num_pipe_configs == 2) {
2313 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2314 switch (reg_offset) {
2315 case 0:
2316 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318 PIPE_CONFIG(ADDR_SURF_P2) |
2319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2320 break;
2321 case 1:
2322 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2324 PIPE_CONFIG(ADDR_SURF_P2) |
2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2326 break;
2327 case 2:
2328 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2330 PIPE_CONFIG(ADDR_SURF_P2) |
2331 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2332 break;
2333 case 3:
2334 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336 PIPE_CONFIG(ADDR_SURF_P2) |
2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2338 break;
2339 case 4:
2340 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342 PIPE_CONFIG(ADDR_SURF_P2) |
2343 TILE_SPLIT(split_equal_to_row_size));
2344 break;
2345 case 5:
2346 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348 break;
2349 case 6:
2350 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2352 PIPE_CONFIG(ADDR_SURF_P2) |
2353 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354 break;
2355 case 7:
2356 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2358 PIPE_CONFIG(ADDR_SURF_P2) |
2359 TILE_SPLIT(split_equal_to_row_size));
2360 break;
2361 case 8:
2362 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2363 break;
2364 case 9:
2365 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2367 break;
2368 case 10:
2369 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371 PIPE_CONFIG(ADDR_SURF_P2) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 break;
2374 case 11:
2375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2377 PIPE_CONFIG(ADDR_SURF_P2) |
2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2379 break;
2380 case 12:
2381 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2383 PIPE_CONFIG(ADDR_SURF_P2) |
2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 break;
2386 case 13:
2387 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2389 break;
2390 case 14:
2391 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393 PIPE_CONFIG(ADDR_SURF_P2) |
2394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395 break;
2396 case 16:
2397 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2399 PIPE_CONFIG(ADDR_SURF_P2) |
2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401 break;
2402 case 17:
2403 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2405 PIPE_CONFIG(ADDR_SURF_P2) |
2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407 break;
2408 case 27:
2409 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2411 break;
2412 case 28:
2413 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415 PIPE_CONFIG(ADDR_SURF_P2) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 break;
2418 case 29:
2419 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2421 PIPE_CONFIG(ADDR_SURF_P2) |
2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423 break;
2424 case 30:
2425 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427 PIPE_CONFIG(ADDR_SURF_P2) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 break;
2430 default:
2431 gb_tile_moden = 0;
2432 break;
2433 }
2434 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2435 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2436 }
2437 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2438 switch (reg_offset) {
2439 case 0:
2440 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2443 NUM_BANKS(ADDR_SURF_16_BANK));
2444 break;
2445 case 1:
2446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2449 NUM_BANKS(ADDR_SURF_16_BANK));
2450 break;
2451 case 2:
2452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2455 NUM_BANKS(ADDR_SURF_16_BANK));
2456 break;
2457 case 3:
2458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2461 NUM_BANKS(ADDR_SURF_16_BANK));
2462 break;
2463 case 4:
2464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2467 NUM_BANKS(ADDR_SURF_16_BANK));
2468 break;
2469 case 5:
2470 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2473 NUM_BANKS(ADDR_SURF_16_BANK));
2474 break;
2475 case 6:
2476 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 NUM_BANKS(ADDR_SURF_8_BANK));
2480 break;
2481 case 8:
2482 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2485 NUM_BANKS(ADDR_SURF_16_BANK));
2486 break;
2487 case 9:
2488 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2491 NUM_BANKS(ADDR_SURF_16_BANK));
2492 break;
2493 case 10:
2494 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2497 NUM_BANKS(ADDR_SURF_16_BANK));
2498 break;
2499 case 11:
2500 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2503 NUM_BANKS(ADDR_SURF_16_BANK));
2504 break;
2505 case 12:
2506 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2509 NUM_BANKS(ADDR_SURF_16_BANK));
2510 break;
2511 case 13:
2512 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2515 NUM_BANKS(ADDR_SURF_16_BANK));
2516 break;
2517 case 14:
2518 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2521 NUM_BANKS(ADDR_SURF_8_BANK));
2522 break;
2523 default:
2524 gb_tile_moden = 0;
2525 break;
2526 }
2527 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2528 }
2529 } else
2530 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2531 }
2532
2533 /**
2534 * cik_select_se_sh - select which SE, SH to address
2535 *
2536 * @rdev: radeon_device pointer
2537 * @se_num: shader engine to address
2538 * @sh_num: sh block to address
2539 *
2540 * Select which SE, SH combinations to address. Certain
2541 * registers are instanced per SE or SH. 0xffffffff means
2542 * broadcast to all SEs or SHs (CIK).
2543 */
2544 static void cik_select_se_sh(struct radeon_device *rdev,
2545 u32 se_num, u32 sh_num)
2546 {
2547 u32 data = INSTANCE_BROADCAST_WRITES;
2548
2549 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2550 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2551 else if (se_num == 0xffffffff)
2552 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2553 else if (sh_num == 0xffffffff)
2554 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2555 else
2556 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2557 WREG32(GRBM_GFX_INDEX, data);
2558 }
2559
2560 /**
2561 * cik_create_bitmask - create a bitmask
2562 *
2563 * @bit_width: length of the mask
2564 *
2565 * create a variable length bit mask (CIK).
2566 * Returns the bitmask.
2567 */
2568 static u32 cik_create_bitmask(u32 bit_width)
2569 {
2570 u32 i, mask = 0;
2571
2572 for (i = 0; i < bit_width; i++) {
2573 mask <<= 1;
2574 mask |= 1;
2575 }
2576 return mask;
2577 }
2578
2579 /**
2580 * cik_select_se_sh - select which SE, SH to address
2581 *
2582 * @rdev: radeon_device pointer
2583 * @max_rb_num: max RBs (render backends) for the asic
2584 * @se_num: number of SEs (shader engines) for the asic
2585 * @sh_per_se: number of SH blocks per SE for the asic
2586 *
2587 * Calculates the bitmask of disabled RBs (CIK).
2588 * Returns the disabled RB bitmask.
2589 */
2590 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2591 u32 max_rb_num, u32 se_num,
2592 u32 sh_per_se)
2593 {
2594 u32 data, mask;
2595
2596 data = RREG32(CC_RB_BACKEND_DISABLE);
2597 if (data & 1)
2598 data &= BACKEND_DISABLE_MASK;
2599 else
2600 data = 0;
2601 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2602
2603 data >>= BACKEND_DISABLE_SHIFT;
2604
2605 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2606
2607 return data & mask;
2608 }
2609
2610 /**
2611 * cik_setup_rb - setup the RBs on the asic
2612 *
2613 * @rdev: radeon_device pointer
2614 * @se_num: number of SEs (shader engines) for the asic
2615 * @sh_per_se: number of SH blocks per SE for the asic
2616 * @max_rb_num: max RBs (render backends) for the asic
2617 *
2618 * Configures per-SE/SH RB registers (CIK).
2619 */
2620 static void cik_setup_rb(struct radeon_device *rdev,
2621 u32 se_num, u32 sh_per_se,
2622 u32 max_rb_num)
2623 {
2624 int i, j;
2625 u32 data, mask;
2626 u32 disabled_rbs = 0;
2627 u32 enabled_rbs = 0;
2628
2629 for (i = 0; i < se_num; i++) {
2630 for (j = 0; j < sh_per_se; j++) {
2631 cik_select_se_sh(rdev, i, j);
2632 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2633 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2634 }
2635 }
2636 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2637
2638 mask = 1;
2639 for (i = 0; i < max_rb_num; i++) {
2640 if (!(disabled_rbs & mask))
2641 enabled_rbs |= mask;
2642 mask <<= 1;
2643 }
2644
2645 for (i = 0; i < se_num; i++) {
2646 cik_select_se_sh(rdev, i, 0xffffffff);
2647 data = 0;
2648 for (j = 0; j < sh_per_se; j++) {
2649 switch (enabled_rbs & 3) {
2650 case 1:
2651 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2652 break;
2653 case 2:
2654 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2655 break;
2656 case 3:
2657 default:
2658 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2659 break;
2660 }
2661 enabled_rbs >>= 2;
2662 }
2663 WREG32(PA_SC_RASTER_CONFIG, data);
2664 }
2665 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2666 }
2667
2668 /**
2669 * cik_gpu_init - setup the 3D engine
2670 *
2671 * @rdev: radeon_device pointer
2672 *
2673 * Configures the 3D engine and tiling configuration
2674 * registers so that the 3D engine is usable.
2675 */
2676 static void cik_gpu_init(struct radeon_device *rdev)
2677 {
2678 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2679 u32 mc_shared_chmap, mc_arb_ramcfg;
2680 u32 hdp_host_path_cntl;
2681 u32 tmp;
2682 int i, j;
2683
2684 switch (rdev->family) {
2685 case CHIP_BONAIRE:
2686 rdev->config.cik.max_shader_engines = 2;
2687 rdev->config.cik.max_tile_pipes = 4;
2688 rdev->config.cik.max_cu_per_sh = 7;
2689 rdev->config.cik.max_sh_per_se = 1;
2690 rdev->config.cik.max_backends_per_se = 2;
2691 rdev->config.cik.max_texture_channel_caches = 4;
2692 rdev->config.cik.max_gprs = 256;
2693 rdev->config.cik.max_gs_threads = 32;
2694 rdev->config.cik.max_hw_contexts = 8;
2695
2696 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2697 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2698 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2699 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2700 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2701 break;
2702 case CHIP_KAVERI:
2703 /* TODO */
2704 break;
2705 case CHIP_KABINI:
2706 default:
2707 rdev->config.cik.max_shader_engines = 1;
2708 rdev->config.cik.max_tile_pipes = 2;
2709 rdev->config.cik.max_cu_per_sh = 2;
2710 rdev->config.cik.max_sh_per_se = 1;
2711 rdev->config.cik.max_backends_per_se = 1;
2712 rdev->config.cik.max_texture_channel_caches = 2;
2713 rdev->config.cik.max_gprs = 256;
2714 rdev->config.cik.max_gs_threads = 16;
2715 rdev->config.cik.max_hw_contexts = 8;
2716
2717 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2718 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2719 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2720 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2721 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2722 break;
2723 }
2724
2725 /* Initialize HDP */
2726 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2727 WREG32((0x2c14 + j), 0x00000000);
2728 WREG32((0x2c18 + j), 0x00000000);
2729 WREG32((0x2c1c + j), 0x00000000);
2730 WREG32((0x2c20 + j), 0x00000000);
2731 WREG32((0x2c24 + j), 0x00000000);
2732 }
2733
2734 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2735
2736 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2737
2738 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2739 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2740
2741 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2742 rdev->config.cik.mem_max_burst_length_bytes = 256;
2743 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2744 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2745 if (rdev->config.cik.mem_row_size_in_kb > 4)
2746 rdev->config.cik.mem_row_size_in_kb = 4;
2747 /* XXX use MC settings? */
2748 rdev->config.cik.shader_engine_tile_size = 32;
2749 rdev->config.cik.num_gpus = 1;
2750 rdev->config.cik.multi_gpu_tile_size = 64;
2751
2752 /* fix up row size */
2753 gb_addr_config &= ~ROW_SIZE_MASK;
2754 switch (rdev->config.cik.mem_row_size_in_kb) {
2755 case 1:
2756 default:
2757 gb_addr_config |= ROW_SIZE(0);
2758 break;
2759 case 2:
2760 gb_addr_config |= ROW_SIZE(1);
2761 break;
2762 case 4:
2763 gb_addr_config |= ROW_SIZE(2);
2764 break;
2765 }
2766
2767 /* setup tiling info dword. gb_addr_config is not adequate since it does
2768 * not have bank info, so create a custom tiling dword.
2769 * bits 3:0 num_pipes
2770 * bits 7:4 num_banks
2771 * bits 11:8 group_size
2772 * bits 15:12 row_size
2773 */
2774 rdev->config.cik.tile_config = 0;
2775 switch (rdev->config.cik.num_tile_pipes) {
2776 case 1:
2777 rdev->config.cik.tile_config |= (0 << 0);
2778 break;
2779 case 2:
2780 rdev->config.cik.tile_config |= (1 << 0);
2781 break;
2782 case 4:
2783 rdev->config.cik.tile_config |= (2 << 0);
2784 break;
2785 case 8:
2786 default:
2787 /* XXX what about 12? */
2788 rdev->config.cik.tile_config |= (3 << 0);
2789 break;
2790 }
2791 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2792 rdev->config.cik.tile_config |= 1 << 4;
2793 else
2794 rdev->config.cik.tile_config |= 0 << 4;
2795 rdev->config.cik.tile_config |=
2796 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2797 rdev->config.cik.tile_config |=
2798 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2799
2800 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2801 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2802 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2803 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2804 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2805 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2806 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2807 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2808
2809 cik_tiling_mode_table_init(rdev);
2810
2811 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2812 rdev->config.cik.max_sh_per_se,
2813 rdev->config.cik.max_backends_per_se);
2814
2815 /* set HW defaults for 3D engine */
2816 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2817
2818 WREG32(SX_DEBUG_1, 0x20);
2819
2820 WREG32(TA_CNTL_AUX, 0x00010000);
2821
2822 tmp = RREG32(SPI_CONFIG_CNTL);
2823 tmp |= 0x03000000;
2824 WREG32(SPI_CONFIG_CNTL, tmp);
2825
2826 WREG32(SQ_CONFIG, 1);
2827
2828 WREG32(DB_DEBUG, 0);
2829
2830 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2831 tmp |= 0x00000400;
2832 WREG32(DB_DEBUG2, tmp);
2833
2834 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2835 tmp |= 0x00020200;
2836 WREG32(DB_DEBUG3, tmp);
2837
2838 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2839 tmp |= 0x00018208;
2840 WREG32(CB_HW_CONTROL, tmp);
2841
2842 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2843
2844 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2845 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2846 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2847 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2848
2849 WREG32(VGT_NUM_INSTANCES, 1);
2850
2851 WREG32(CP_PERFMON_CNTL, 0);
2852
2853 WREG32(SQ_CONFIG, 0);
2854
2855 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2856 FORCE_EOV_MAX_REZ_CNT(255)));
2857
2858 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2859 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2860
2861 WREG32(VGT_GS_VERTEX_REUSE, 16);
2862 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2863
2864 tmp = RREG32(HDP_MISC_CNTL);
2865 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2866 WREG32(HDP_MISC_CNTL, tmp);
2867
2868 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2869 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2870
2871 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2872 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2873
2874 udelay(50);
2875 }
2876
2877 /*
2878 * GPU scratch registers helpers function.
2879 */
2880 /**
2881 * cik_scratch_init - setup driver info for CP scratch regs
2882 *
2883 * @rdev: radeon_device pointer
2884 *
2885 * Set up the number and offset of the CP scratch registers.
2886 * NOTE: use of CP scratch registers is a legacy inferface and
2887 * is not used by default on newer asics (r6xx+). On newer asics,
2888 * memory buffers are used for fences rather than scratch regs.
2889 */
2890 static void cik_scratch_init(struct radeon_device *rdev)
2891 {
2892 int i;
2893
2894 rdev->scratch.num_reg = 7;
2895 rdev->scratch.reg_base = SCRATCH_REG0;
2896 for (i = 0; i < rdev->scratch.num_reg; i++) {
2897 rdev->scratch.free[i] = true;
2898 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2899 }
2900 }
2901
2902 /**
2903 * cik_ring_test - basic gfx ring test
2904 *
2905 * @rdev: radeon_device pointer
2906 * @ring: radeon_ring structure holding ring information
2907 *
2908 * Allocate a scratch register and write to it using the gfx ring (CIK).
2909 * Provides a basic gfx ring test to verify that the ring is working.
2910 * Used by cik_cp_gfx_resume();
2911 * Returns 0 on success, error on failure.
2912 */
2913 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2914 {
2915 uint32_t scratch;
2916 uint32_t tmp = 0;
2917 unsigned i;
2918 int r;
2919
2920 r = radeon_scratch_get(rdev, &scratch);
2921 if (r) {
2922 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2923 return r;
2924 }
2925 WREG32(scratch, 0xCAFEDEAD);
2926 r = radeon_ring_lock(rdev, ring, 3);
2927 if (r) {
2928 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2929 radeon_scratch_free(rdev, scratch);
2930 return r;
2931 }
2932 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2933 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2934 radeon_ring_write(ring, 0xDEADBEEF);
2935 radeon_ring_unlock_commit(rdev, ring);
2936
2937 for (i = 0; i < rdev->usec_timeout; i++) {
2938 tmp = RREG32(scratch);
2939 if (tmp == 0xDEADBEEF)
2940 break;
2941 DRM_UDELAY(1);
2942 }
2943 if (i < rdev->usec_timeout) {
2944 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2945 } else {
2946 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2947 ring->idx, scratch, tmp);
2948 r = -EINVAL;
2949 }
2950 radeon_scratch_free(rdev, scratch);
2951 return r;
2952 }
2953
2954 /**
2955 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2956 *
2957 * @rdev: radeon_device pointer
2958 * @fence: radeon fence object
2959 *
2960 * Emits a fence sequnce number on the gfx ring and flushes
2961 * GPU caches.
2962 */
2963 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2964 struct radeon_fence *fence)
2965 {
2966 struct radeon_ring *ring = &rdev->ring[fence->ring];
2967 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2968
2969 /* EVENT_WRITE_EOP - flush caches, send int */
2970 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2971 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2972 EOP_TC_ACTION_EN |
2973 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2974 EVENT_INDEX(5)));
2975 radeon_ring_write(ring, addr & 0xfffffffc);
2976 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2977 radeon_ring_write(ring, fence->seq);
2978 radeon_ring_write(ring, 0);
2979 /* HDP flush */
2980 /* We should be using the new WAIT_REG_MEM special op packet here
2981 * but it causes the CP to hang
2982 */
2983 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2984 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2985 WRITE_DATA_DST_SEL(0)));
2986 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2987 radeon_ring_write(ring, 0);
2988 radeon_ring_write(ring, 0);
2989 }
2990
2991 /**
2992 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2993 *
2994 * @rdev: radeon_device pointer
2995 * @fence: radeon fence object
2996 *
2997 * Emits a fence sequnce number on the compute ring and flushes
2998 * GPU caches.
2999 */
3000 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3001 struct radeon_fence *fence)
3002 {
3003 struct radeon_ring *ring = &rdev->ring[fence->ring];
3004 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3005
3006 /* RELEASE_MEM - flush caches, send int */
3007 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3008 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3009 EOP_TC_ACTION_EN |
3010 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3011 EVENT_INDEX(5)));
3012 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3013 radeon_ring_write(ring, addr & 0xfffffffc);
3014 radeon_ring_write(ring, upper_32_bits(addr));
3015 radeon_ring_write(ring, fence->seq);
3016 radeon_ring_write(ring, 0);
3017 /* HDP flush */
3018 /* We should be using the new WAIT_REG_MEM special op packet here
3019 * but it causes the CP to hang
3020 */
3021 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3022 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3023 WRITE_DATA_DST_SEL(0)));
3024 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3025 radeon_ring_write(ring, 0);
3026 radeon_ring_write(ring, 0);
3027 }
3028
3029 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3030 struct radeon_ring *ring,
3031 struct radeon_semaphore *semaphore,
3032 bool emit_wait)
3033 {
3034 uint64_t addr = semaphore->gpu_addr;
3035 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3036
3037 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3038 radeon_ring_write(ring, addr & 0xffffffff);
3039 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3040 }
3041
3042 /*
3043 * IB stuff
3044 */
3045 /**
3046 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3047 *
3048 * @rdev: radeon_device pointer
3049 * @ib: radeon indirect buffer object
3050 *
3051 * Emits an DE (drawing engine) or CE (constant engine) IB
3052 * on the gfx ring. IBs are usually generated by userspace
3053 * acceleration drivers and submitted to the kernel for
3054 * sheduling on the ring. This function schedules the IB
3055 * on the gfx ring for execution by the GPU.
3056 */
3057 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3058 {
3059 struct radeon_ring *ring = &rdev->ring[ib->ring];
3060 u32 header, control = INDIRECT_BUFFER_VALID;
3061
3062 if (ib->is_const_ib) {
3063 /* set switch buffer packet before const IB */
3064 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3065 radeon_ring_write(ring, 0);
3066
3067 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3068 } else {
3069 u32 next_rptr;
3070 if (ring->rptr_save_reg) {
3071 next_rptr = ring->wptr + 3 + 4;
3072 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3073 radeon_ring_write(ring, ((ring->rptr_save_reg -
3074 PACKET3_SET_UCONFIG_REG_START) >> 2));
3075 radeon_ring_write(ring, next_rptr);
3076 } else if (rdev->wb.enabled) {
3077 next_rptr = ring->wptr + 5 + 4;
3078 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3079 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3080 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3081 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3082 radeon_ring_write(ring, next_rptr);
3083 }
3084
3085 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3086 }
3087
3088 control |= ib->length_dw |
3089 (ib->vm ? (ib->vm->id << 24) : 0);
3090
3091 radeon_ring_write(ring, header);
3092 radeon_ring_write(ring,
3093 #ifdef __BIG_ENDIAN
3094 (2 << 0) |
3095 #endif
3096 (ib->gpu_addr & 0xFFFFFFFC));
3097 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3098 radeon_ring_write(ring, control);
3099 }
3100
3101 /**
3102 * cik_ib_test - basic gfx ring IB test
3103 *
3104 * @rdev: radeon_device pointer
3105 * @ring: radeon_ring structure holding ring information
3106 *
3107 * Allocate an IB and execute it on the gfx ring (CIK).
3108 * Provides a basic gfx ring test to verify that IBs are working.
3109 * Returns 0 on success, error on failure.
3110 */
3111 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3112 {
3113 struct radeon_ib ib;
3114 uint32_t scratch;
3115 uint32_t tmp = 0;
3116 unsigned i;
3117 int r;
3118
3119 r = radeon_scratch_get(rdev, &scratch);
3120 if (r) {
3121 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3122 return r;
3123 }
3124 WREG32(scratch, 0xCAFEDEAD);
3125 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3126 if (r) {
3127 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3128 return r;
3129 }
3130 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3131 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3132 ib.ptr[2] = 0xDEADBEEF;
3133 ib.length_dw = 3;
3134 r = radeon_ib_schedule(rdev, &ib, NULL);
3135 if (r) {
3136 radeon_scratch_free(rdev, scratch);
3137 radeon_ib_free(rdev, &ib);
3138 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3139 return r;
3140 }
3141 r = radeon_fence_wait(ib.fence, false);
3142 if (r) {
3143 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3144 return r;
3145 }
3146 for (i = 0; i < rdev->usec_timeout; i++) {
3147 tmp = RREG32(scratch);
3148 if (tmp == 0xDEADBEEF)
3149 break;
3150 DRM_UDELAY(1);
3151 }
3152 if (i < rdev->usec_timeout) {
3153 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3154 } else {
3155 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3156 scratch, tmp);
3157 r = -EINVAL;
3158 }
3159 radeon_scratch_free(rdev, scratch);
3160 radeon_ib_free(rdev, &ib);
3161 return r;
3162 }
3163
3164 /*
3165 * CP.
3166 * On CIK, gfx and compute now have independant command processors.
3167 *
3168 * GFX
3169 * Gfx consists of a single ring and can process both gfx jobs and
3170 * compute jobs. The gfx CP consists of three microengines (ME):
3171 * PFP - Pre-Fetch Parser
3172 * ME - Micro Engine
3173 * CE - Constant Engine
3174 * The PFP and ME make up what is considered the Drawing Engine (DE).
3175 * The CE is an asynchronous engine used for updating buffer desciptors
3176 * used by the DE so that they can be loaded into cache in parallel
3177 * while the DE is processing state update packets.
3178 *
3179 * Compute
3180 * The compute CP consists of two microengines (ME):
3181 * MEC1 - Compute MicroEngine 1
3182 * MEC2 - Compute MicroEngine 2
3183 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3184 * The queues are exposed to userspace and are programmed directly
3185 * by the compute runtime.
3186 */
3187 /**
3188 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3189 *
3190 * @rdev: radeon_device pointer
3191 * @enable: enable or disable the MEs
3192 *
3193 * Halts or unhalts the gfx MEs.
3194 */
3195 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3196 {
3197 if (enable)
3198 WREG32(CP_ME_CNTL, 0);
3199 else {
3200 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3201 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3202 }
3203 udelay(50);
3204 }
3205
3206 /**
3207 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3208 *
3209 * @rdev: radeon_device pointer
3210 *
3211 * Loads the gfx PFP, ME, and CE ucode.
3212 * Returns 0 for success, -EINVAL if the ucode is not available.
3213 */
3214 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3215 {
3216 const __be32 *fw_data;
3217 int i;
3218
3219 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3220 return -EINVAL;
3221
3222 cik_cp_gfx_enable(rdev, false);
3223
3224 /* PFP */
3225 fw_data = (const __be32 *)rdev->pfp_fw->data;
3226 WREG32(CP_PFP_UCODE_ADDR, 0);
3227 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3228 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3229 WREG32(CP_PFP_UCODE_ADDR, 0);
3230
3231 /* CE */
3232 fw_data = (const __be32 *)rdev->ce_fw->data;
3233 WREG32(CP_CE_UCODE_ADDR, 0);
3234 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3235 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3236 WREG32(CP_CE_UCODE_ADDR, 0);
3237
3238 /* ME */
3239 fw_data = (const __be32 *)rdev->me_fw->data;
3240 WREG32(CP_ME_RAM_WADDR, 0);
3241 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3242 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3243 WREG32(CP_ME_RAM_WADDR, 0);
3244
3245 WREG32(CP_PFP_UCODE_ADDR, 0);
3246 WREG32(CP_CE_UCODE_ADDR, 0);
3247 WREG32(CP_ME_RAM_WADDR, 0);
3248 WREG32(CP_ME_RAM_RADDR, 0);
3249 return 0;
3250 }
3251
3252 /**
3253 * cik_cp_gfx_start - start the gfx ring
3254 *
3255 * @rdev: radeon_device pointer
3256 *
3257 * Enables the ring and loads the clear state context and other
3258 * packets required to init the ring.
3259 * Returns 0 for success, error for failure.
3260 */
3261 static int cik_cp_gfx_start(struct radeon_device *rdev)
3262 {
3263 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3264 int r, i;
3265
3266 /* init the CP */
3267 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3268 WREG32(CP_ENDIAN_SWAP, 0);
3269 WREG32(CP_DEVICE_ID, 1);
3270
3271 cik_cp_gfx_enable(rdev, true);
3272
3273 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3274 if (r) {
3275 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3276 return r;
3277 }
3278
3279 /* init the CE partitions. CE only used for gfx on CIK */
3280 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3281 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3282 radeon_ring_write(ring, 0xc000);
3283 radeon_ring_write(ring, 0xc000);
3284
3285 /* setup clear context state */
3286 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3287 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3288
3289 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3290 radeon_ring_write(ring, 0x80000000);
3291 radeon_ring_write(ring, 0x80000000);
3292
3293 for (i = 0; i < cik_default_size; i++)
3294 radeon_ring_write(ring, cik_default_state[i]);
3295
3296 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3297 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3298
3299 /* set clear context state */
3300 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3301 radeon_ring_write(ring, 0);
3302
3303 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3304 radeon_ring_write(ring, 0x00000316);
3305 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3306 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3307
3308 radeon_ring_unlock_commit(rdev, ring);
3309
3310 return 0;
3311 }
3312
3313 /**
3314 * cik_cp_gfx_fini - stop the gfx ring
3315 *
3316 * @rdev: radeon_device pointer
3317 *
3318 * Stop the gfx ring and tear down the driver ring
3319 * info.
3320 */
3321 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3322 {
3323 cik_cp_gfx_enable(rdev, false);
3324 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3325 }
3326
3327 /**
3328 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3329 *
3330 * @rdev: radeon_device pointer
3331 *
3332 * Program the location and size of the gfx ring buffer
3333 * and test it to make sure it's working.
3334 * Returns 0 for success, error for failure.
3335 */
3336 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3337 {
3338 struct radeon_ring *ring;
3339 u32 tmp;
3340 u32 rb_bufsz;
3341 u64 rb_addr;
3342 int r;
3343
3344 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3345 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3346
3347 /* Set the write pointer delay */
3348 WREG32(CP_RB_WPTR_DELAY, 0);
3349
3350 /* set the RB to use vmid 0 */
3351 WREG32(CP_RB_VMID, 0);
3352
3353 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3354
3355 /* ring 0 - compute and gfx */
3356 /* Set ring buffer size */
3357 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3358 rb_bufsz = drm_order(ring->ring_size / 8);
3359 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3360 #ifdef __BIG_ENDIAN
3361 tmp |= BUF_SWAP_32BIT;
3362 #endif
3363 WREG32(CP_RB0_CNTL, tmp);
3364
3365 /* Initialize the ring buffer's read and write pointers */
3366 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3367 ring->wptr = 0;
3368 WREG32(CP_RB0_WPTR, ring->wptr);
3369
3370 /* set the wb address wether it's enabled or not */
3371 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3372 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3373
3374 /* scratch register shadowing is no longer supported */
3375 WREG32(SCRATCH_UMSK, 0);
3376
3377 if (!rdev->wb.enabled)
3378 tmp |= RB_NO_UPDATE;
3379
3380 mdelay(1);
3381 WREG32(CP_RB0_CNTL, tmp);
3382
3383 rb_addr = ring->gpu_addr >> 8;
3384 WREG32(CP_RB0_BASE, rb_addr);
3385 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3386
3387 ring->rptr = RREG32(CP_RB0_RPTR);
3388
3389 /* start the ring */
3390 cik_cp_gfx_start(rdev);
3391 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3392 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3393 if (r) {
3394 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3395 return r;
3396 }
3397 return 0;
3398 }
3399
3400 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3401 struct radeon_ring *ring)
3402 {
3403 u32 rptr;
3404
3405
3406
3407 if (rdev->wb.enabled) {
3408 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3409 } else {
3410 mutex_lock(&rdev->srbm_mutex);
3411 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3412 rptr = RREG32(CP_HQD_PQ_RPTR);
3413 cik_srbm_select(rdev, 0, 0, 0, 0);
3414 mutex_unlock(&rdev->srbm_mutex);
3415 }
3416 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3417
3418 return rptr;
3419 }
3420
3421 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3422 struct radeon_ring *ring)
3423 {
3424 u32 wptr;
3425
3426 if (rdev->wb.enabled) {
3427 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3428 } else {
3429 mutex_lock(&rdev->srbm_mutex);
3430 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3431 wptr = RREG32(CP_HQD_PQ_WPTR);
3432 cik_srbm_select(rdev, 0, 0, 0, 0);
3433 mutex_unlock(&rdev->srbm_mutex);
3434 }
3435 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
3436
3437 return wptr;
3438 }
3439
3440 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3441 struct radeon_ring *ring)
3442 {
3443 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
3444
3445 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
3446 WDOORBELL32(ring->doorbell_offset, wptr);
3447 }
3448
3449 /**
3450 * cik_cp_compute_enable - enable/disable the compute CP MEs
3451 *
3452 * @rdev: radeon_device pointer
3453 * @enable: enable or disable the MEs
3454 *
3455 * Halts or unhalts the compute MEs.
3456 */
3457 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3458 {
3459 if (enable)
3460 WREG32(CP_MEC_CNTL, 0);
3461 else
3462 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3463 udelay(50);
3464 }
3465
3466 /**
3467 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3468 *
3469 * @rdev: radeon_device pointer
3470 *
3471 * Loads the compute MEC1&2 ucode.
3472 * Returns 0 for success, -EINVAL if the ucode is not available.
3473 */
3474 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3475 {
3476 const __be32 *fw_data;
3477 int i;
3478
3479 if (!rdev->mec_fw)
3480 return -EINVAL;
3481
3482 cik_cp_compute_enable(rdev, false);
3483
3484 /* MEC1 */
3485 fw_data = (const __be32 *)rdev->mec_fw->data;
3486 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3487 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3488 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3489 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3490
3491 if (rdev->family == CHIP_KAVERI) {
3492 /* MEC2 */
3493 fw_data = (const __be32 *)rdev->mec_fw->data;
3494 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3495 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3496 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3497 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3498 }
3499
3500 return 0;
3501 }
3502
3503 /**
3504 * cik_cp_compute_start - start the compute queues
3505 *
3506 * @rdev: radeon_device pointer
3507 *
3508 * Enable the compute queues.
3509 * Returns 0 for success, error for failure.
3510 */
3511 static int cik_cp_compute_start(struct radeon_device *rdev)
3512 {
3513 cik_cp_compute_enable(rdev, true);
3514
3515 return 0;
3516 }
3517
3518 /**
3519 * cik_cp_compute_fini - stop the compute queues
3520 *
3521 * @rdev: radeon_device pointer
3522 *
3523 * Stop the compute queues and tear down the driver queue
3524 * info.
3525 */
3526 static void cik_cp_compute_fini(struct radeon_device *rdev)
3527 {
3528 int i, idx, r;
3529
3530 cik_cp_compute_enable(rdev, false);
3531
3532 for (i = 0; i < 2; i++) {
3533 if (i == 0)
3534 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3535 else
3536 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3537
3538 if (rdev->ring[idx].mqd_obj) {
3539 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3540 if (unlikely(r != 0))
3541 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3542
3543 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3544 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3545
3546 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3547 rdev->ring[idx].mqd_obj = NULL;
3548 }
3549 }
3550 }
3551
3552 static void cik_mec_fini(struct radeon_device *rdev)
3553 {
3554 int r;
3555
3556 if (rdev->mec.hpd_eop_obj) {
3557 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3558 if (unlikely(r != 0))
3559 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3560 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3561 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3562
3563 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3564 rdev->mec.hpd_eop_obj = NULL;
3565 }
3566 }
3567
3568 #define MEC_HPD_SIZE 2048
3569
3570 static int cik_mec_init(struct radeon_device *rdev)
3571 {
3572 int r;
3573 u32 *hpd;
3574
3575 /*
3576 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3577 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3578 */
3579 if (rdev->family == CHIP_KAVERI)
3580 rdev->mec.num_mec = 2;
3581 else
3582 rdev->mec.num_mec = 1;
3583 rdev->mec.num_pipe = 4;
3584 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3585
3586 if (rdev->mec.hpd_eop_obj == NULL) {
3587 r = radeon_bo_create(rdev,
3588 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3589 PAGE_SIZE, true,
3590 RADEON_GEM_DOMAIN_GTT, NULL,
3591 &rdev->mec.hpd_eop_obj);
3592 if (r) {
3593 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3594 return r;
3595 }
3596 }
3597
3598 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3599 if (unlikely(r != 0)) {
3600 cik_mec_fini(rdev);
3601 return r;
3602 }
3603 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3604 &rdev->mec.hpd_eop_gpu_addr);
3605 if (r) {
3606 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3607 cik_mec_fini(rdev);
3608 return r;
3609 }
3610 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3611 if (r) {
3612 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3613 cik_mec_fini(rdev);
3614 return r;
3615 }
3616
3617 /* clear memory. Not sure if this is required or not */
3618 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3619
3620 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3621 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3622
3623 return 0;
3624 }
3625
3626 struct hqd_registers
3627 {
3628 u32 cp_mqd_base_addr;
3629 u32 cp_mqd_base_addr_hi;
3630 u32 cp_hqd_active;
3631 u32 cp_hqd_vmid;
3632 u32 cp_hqd_persistent_state;
3633 u32 cp_hqd_pipe_priority;
3634 u32 cp_hqd_queue_priority;
3635 u32 cp_hqd_quantum;
3636 u32 cp_hqd_pq_base;
3637 u32 cp_hqd_pq_base_hi;
3638 u32 cp_hqd_pq_rptr;
3639 u32 cp_hqd_pq_rptr_report_addr;
3640 u32 cp_hqd_pq_rptr_report_addr_hi;
3641 u32 cp_hqd_pq_wptr_poll_addr;
3642 u32 cp_hqd_pq_wptr_poll_addr_hi;
3643 u32 cp_hqd_pq_doorbell_control;
3644 u32 cp_hqd_pq_wptr;
3645 u32 cp_hqd_pq_control;
3646 u32 cp_hqd_ib_base_addr;
3647 u32 cp_hqd_ib_base_addr_hi;
3648 u32 cp_hqd_ib_rptr;
3649 u32 cp_hqd_ib_control;
3650 u32 cp_hqd_iq_timer;
3651 u32 cp_hqd_iq_rptr;
3652 u32 cp_hqd_dequeue_request;
3653 u32 cp_hqd_dma_offload;
3654 u32 cp_hqd_sema_cmd;
3655 u32 cp_hqd_msg_type;
3656 u32 cp_hqd_atomic0_preop_lo;
3657 u32 cp_hqd_atomic0_preop_hi;
3658 u32 cp_hqd_atomic1_preop_lo;
3659 u32 cp_hqd_atomic1_preop_hi;
3660 u32 cp_hqd_hq_scheduler0;
3661 u32 cp_hqd_hq_scheduler1;
3662 u32 cp_mqd_control;
3663 };
3664
3665 struct bonaire_mqd
3666 {
3667 u32 header;
3668 u32 dispatch_initiator;
3669 u32 dimensions[3];
3670 u32 start_idx[3];
3671 u32 num_threads[3];
3672 u32 pipeline_stat_enable;
3673 u32 perf_counter_enable;
3674 u32 pgm[2];
3675 u32 tba[2];
3676 u32 tma[2];
3677 u32 pgm_rsrc[2];
3678 u32 vmid;
3679 u32 resource_limits;
3680 u32 static_thread_mgmt01[2];
3681 u32 tmp_ring_size;
3682 u32 static_thread_mgmt23[2];
3683 u32 restart[3];
3684 u32 thread_trace_enable;
3685 u32 reserved1;
3686 u32 user_data[16];
3687 u32 vgtcs_invoke_count[2];
3688 struct hqd_registers queue_state;
3689 u32 dequeue_cntr;
3690 u32 interrupt_queue[64];
3691 };
3692
3693 /**
3694 * cik_cp_compute_resume - setup the compute queue registers
3695 *
3696 * @rdev: radeon_device pointer
3697 *
3698 * Program the compute queues and test them to make sure they
3699 * are working.
3700 * Returns 0 for success, error for failure.
3701 */
3702 static int cik_cp_compute_resume(struct radeon_device *rdev)
3703 {
3704 int r, i, idx;
3705 u32 tmp;
3706 bool use_doorbell = true;
3707 u64 hqd_gpu_addr;
3708 u64 mqd_gpu_addr;
3709 u64 eop_gpu_addr;
3710 u64 wb_gpu_addr;
3711 u32 *buf;
3712 struct bonaire_mqd *mqd;
3713
3714 r = cik_cp_compute_start(rdev);
3715 if (r)
3716 return r;
3717
3718 /* fix up chicken bits */
3719 tmp = RREG32(CP_CPF_DEBUG);
3720 tmp |= (1 << 23);
3721 WREG32(CP_CPF_DEBUG, tmp);
3722
3723 /* init the pipes */
3724 mutex_lock(&rdev->srbm_mutex);
3725 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3726 int me = (i < 4) ? 1 : 2;
3727 int pipe = (i < 4) ? i : (i - 4);
3728
3729 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3730
3731 cik_srbm_select(rdev, me, pipe, 0, 0);
3732
3733 /* write the EOP addr */
3734 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3735 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3736
3737 /* set the VMID assigned */
3738 WREG32(CP_HPD_EOP_VMID, 0);
3739
3740 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3741 tmp = RREG32(CP_HPD_EOP_CONTROL);
3742 tmp &= ~EOP_SIZE_MASK;
3743 tmp |= drm_order(MEC_HPD_SIZE / 8);
3744 WREG32(CP_HPD_EOP_CONTROL, tmp);
3745 }
3746 cik_srbm_select(rdev, 0, 0, 0, 0);
3747 mutex_unlock(&rdev->srbm_mutex);
3748
3749 /* init the queues. Just two for now. */
3750 for (i = 0; i < 2; i++) {
3751 if (i == 0)
3752 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3753 else
3754 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3755
3756 if (rdev->ring[idx].mqd_obj == NULL) {
3757 r = radeon_bo_create(rdev,
3758 sizeof(struct bonaire_mqd),
3759 PAGE_SIZE, true,
3760 RADEON_GEM_DOMAIN_GTT, NULL,
3761 &rdev->ring[idx].mqd_obj);
3762 if (r) {
3763 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3764 return r;
3765 }
3766 }
3767
3768 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3769 if (unlikely(r != 0)) {
3770 cik_cp_compute_fini(rdev);
3771 return r;
3772 }
3773 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3774 &mqd_gpu_addr);
3775 if (r) {
3776 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3777 cik_cp_compute_fini(rdev);
3778 return r;
3779 }
3780 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3781 if (r) {
3782 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3783 cik_cp_compute_fini(rdev);
3784 return r;
3785 }
3786
3787 /* doorbell offset */
3788 rdev->ring[idx].doorbell_offset =
3789 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3790
3791 /* init the mqd struct */
3792 memset(buf, 0, sizeof(struct bonaire_mqd));
3793
3794 mqd = (struct bonaire_mqd *)buf;
3795 mqd->header = 0xC0310800;
3796 mqd->static_thread_mgmt01[0] = 0xffffffff;
3797 mqd->static_thread_mgmt01[1] = 0xffffffff;
3798 mqd->static_thread_mgmt23[0] = 0xffffffff;
3799 mqd->static_thread_mgmt23[1] = 0xffffffff;
3800
3801 mutex_lock(&rdev->srbm_mutex);
3802 cik_srbm_select(rdev, rdev->ring[idx].me,
3803 rdev->ring[idx].pipe,
3804 rdev->ring[idx].queue, 0);
3805
3806 /* disable wptr polling */
3807 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3808 tmp &= ~WPTR_POLL_EN;
3809 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3810
3811 /* enable doorbell? */
3812 mqd->queue_state.cp_hqd_pq_doorbell_control =
3813 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3814 if (use_doorbell)
3815 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3816 else
3817 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3818 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3819 mqd->queue_state.cp_hqd_pq_doorbell_control);
3820
3821 /* disable the queue if it's active */
3822 mqd->queue_state.cp_hqd_dequeue_request = 0;
3823 mqd->queue_state.cp_hqd_pq_rptr = 0;
3824 mqd->queue_state.cp_hqd_pq_wptr= 0;
3825 if (RREG32(CP_HQD_ACTIVE) & 1) {
3826 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3827 for (i = 0; i < rdev->usec_timeout; i++) {
3828 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3829 break;
3830 udelay(1);
3831 }
3832 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3833 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3834 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3835 }
3836
3837 /* set the pointer to the MQD */
3838 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3839 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3840 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3841 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3842 /* set MQD vmid to 0 */
3843 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3844 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3845 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3846
3847 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3848 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3849 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3850 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3851 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3852 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3853
3854 /* set up the HQD, this is similar to CP_RB0_CNTL */
3855 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3856 mqd->queue_state.cp_hqd_pq_control &=
3857 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3858
3859 mqd->queue_state.cp_hqd_pq_control |=
3860 drm_order(rdev->ring[idx].ring_size / 8);
3861 mqd->queue_state.cp_hqd_pq_control |=
3862 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3863 #ifdef __BIG_ENDIAN
3864 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3865 #endif
3866 mqd->queue_state.cp_hqd_pq_control &=
3867 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3868 mqd->queue_state.cp_hqd_pq_control |=
3869 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3870 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3871
3872 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3873 if (i == 0)
3874 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3875 else
3876 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3877 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3878 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3879 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3880 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3881 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3882
3883 /* set the wb address wether it's enabled or not */
3884 if (i == 0)
3885 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3886 else
3887 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3888 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3889 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3890 upper_32_bits(wb_gpu_addr) & 0xffff;
3891 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3892 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3893 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3894 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3895
3896 /* enable the doorbell if requested */
3897 if (use_doorbell) {
3898 mqd->queue_state.cp_hqd_pq_doorbell_control =
3899 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3900 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3901 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3902 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3903 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3904 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3905 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3906
3907 } else {
3908 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3909 }
3910 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3911 mqd->queue_state.cp_hqd_pq_doorbell_control);
3912
3913 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3914 rdev->ring[idx].wptr = 0;
3915 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3916 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3917 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3918 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3919
3920 /* set the vmid for the queue */
3921 mqd->queue_state.cp_hqd_vmid = 0;
3922 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3923
3924 /* activate the queue */
3925 mqd->queue_state.cp_hqd_active = 1;
3926 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3927
3928 cik_srbm_select(rdev, 0, 0, 0, 0);
3929 mutex_unlock(&rdev->srbm_mutex);
3930
3931 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3932 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3933
3934 rdev->ring[idx].ready = true;
3935 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3936 if (r)
3937 rdev->ring[idx].ready = false;
3938 }
3939
3940 return 0;
3941 }
3942
3943 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3944 {
3945 cik_cp_gfx_enable(rdev, enable);
3946 cik_cp_compute_enable(rdev, enable);
3947 }
3948
3949 static int cik_cp_load_microcode(struct radeon_device *rdev)
3950 {
3951 int r;
3952
3953 r = cik_cp_gfx_load_microcode(rdev);
3954 if (r)
3955 return r;
3956 r = cik_cp_compute_load_microcode(rdev);
3957 if (r)
3958 return r;
3959
3960 return 0;
3961 }
3962
3963 static void cik_cp_fini(struct radeon_device *rdev)
3964 {
3965 cik_cp_gfx_fini(rdev);
3966 cik_cp_compute_fini(rdev);
3967 }
3968
3969 static int cik_cp_resume(struct radeon_device *rdev)
3970 {
3971 int r;
3972
3973 /* Reset all cp blocks */
3974 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3975 RREG32(GRBM_SOFT_RESET);
3976 mdelay(15);
3977 WREG32(GRBM_SOFT_RESET, 0);
3978 RREG32(GRBM_SOFT_RESET);
3979
3980 r = cik_cp_load_microcode(rdev);
3981 if (r)
3982 return r;
3983
3984 r = cik_cp_gfx_resume(rdev);
3985 if (r)
3986 return r;
3987 r = cik_cp_compute_resume(rdev);
3988 if (r)
3989 return r;
3990
3991 return 0;
3992 }
3993
3994 /*
3995 * sDMA - System DMA
3996 * Starting with CIK, the GPU has new asynchronous
3997 * DMA engines. These engines are used for compute
3998 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3999 * and each one supports 1 ring buffer used for gfx
4000 * and 2 queues used for compute.
4001 *
4002 * The programming model is very similar to the CP
4003 * (ring buffer, IBs, etc.), but sDMA has it's own
4004 * packet format that is different from the PM4 format
4005 * used by the CP. sDMA supports copying data, writing
4006 * embedded data, solid fills, and a number of other
4007 * things. It also has support for tiling/detiling of
4008 * buffers.
4009 */
4010 /**
4011 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
4012 *
4013 * @rdev: radeon_device pointer
4014 * @ib: IB object to schedule
4015 *
4016 * Schedule an IB in the DMA ring (CIK).
4017 */
4018 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
4019 struct radeon_ib *ib)
4020 {
4021 struct radeon_ring *ring = &rdev->ring[ib->ring];
4022 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
4023
4024 if (rdev->wb.enabled) {
4025 u32 next_rptr = ring->wptr + 5;
4026 while ((next_rptr & 7) != 4)
4027 next_rptr++;
4028 next_rptr += 4;
4029 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4030 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4031 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4032 radeon_ring_write(ring, 1); /* number of DWs to follow */
4033 radeon_ring_write(ring, next_rptr);
4034 }
4035
4036 /* IB packet must end on a 8 DW boundary */
4037 while ((ring->wptr & 7) != 4)
4038 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4039 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
4040 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
4041 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
4042 radeon_ring_write(ring, ib->length_dw);
4043
4044 }
4045
4046 /**
4047 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4048 *
4049 * @rdev: radeon_device pointer
4050 * @fence: radeon fence object
4051 *
4052 * Add a DMA fence packet to the ring to write
4053 * the fence seq number and DMA trap packet to generate
4054 * an interrupt if needed (CIK).
4055 */
4056 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4057 struct radeon_fence *fence)
4058 {
4059 struct radeon_ring *ring = &rdev->ring[fence->ring];
4060 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4061 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4062 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4063 u32 ref_and_mask;
4064
4065 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4066 ref_and_mask = SDMA0;
4067 else
4068 ref_and_mask = SDMA1;
4069
4070 /* write the fence */
4071 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4072 radeon_ring_write(ring, addr & 0xffffffff);
4073 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4074 radeon_ring_write(ring, fence->seq);
4075 /* generate an interrupt */
4076 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4077 /* flush HDP */
4078 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4079 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4080 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4081 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4082 radeon_ring_write(ring, ref_and_mask); /* MASK */
4083 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4084 }
4085
4086 /**
4087 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4088 *
4089 * @rdev: radeon_device pointer
4090 * @ring: radeon_ring structure holding ring information
4091 * @semaphore: radeon semaphore object
4092 * @emit_wait: wait or signal semaphore
4093 *
4094 * Add a DMA semaphore packet to the ring wait on or signal
4095 * other rings (CIK).
4096 */
4097 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4098 struct radeon_ring *ring,
4099 struct radeon_semaphore *semaphore,
4100 bool emit_wait)
4101 {
4102 u64 addr = semaphore->gpu_addr;
4103 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4104
4105 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4106 radeon_ring_write(ring, addr & 0xfffffff8);
4107 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4108 }
4109
4110 /**
4111 * cik_sdma_gfx_stop - stop the gfx async dma engines
4112 *
4113 * @rdev: radeon_device pointer
4114 *
4115 * Stop the gfx async dma ring buffers (CIK).
4116 */
4117 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4118 {
4119 u32 rb_cntl, reg_offset;
4120 int i;
4121
4122 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4123
4124 for (i = 0; i < 2; i++) {
4125 if (i == 0)
4126 reg_offset = SDMA0_REGISTER_OFFSET;
4127 else
4128 reg_offset = SDMA1_REGISTER_OFFSET;
4129 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4130 rb_cntl &= ~SDMA_RB_ENABLE;
4131 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4132 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4133 }
4134 }
4135
4136 /**
4137 * cik_sdma_rlc_stop - stop the compute async dma engines
4138 *
4139 * @rdev: radeon_device pointer
4140 *
4141 * Stop the compute async dma queues (CIK).
4142 */
4143 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4144 {
4145 /* XXX todo */
4146 }
4147
4148 /**
4149 * cik_sdma_enable - stop the async dma engines
4150 *
4151 * @rdev: radeon_device pointer
4152 * @enable: enable/disable the DMA MEs.
4153 *
4154 * Halt or unhalt the async dma engines (CIK).
4155 */
4156 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4157 {
4158 u32 me_cntl, reg_offset;
4159 int i;
4160
4161 for (i = 0; i < 2; i++) {
4162 if (i == 0)
4163 reg_offset = SDMA0_REGISTER_OFFSET;
4164 else
4165 reg_offset = SDMA1_REGISTER_OFFSET;
4166 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4167 if (enable)
4168 me_cntl &= ~SDMA_HALT;
4169 else
4170 me_cntl |= SDMA_HALT;
4171 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4172 }
4173 }
4174
4175 /**
4176 * cik_sdma_gfx_resume - setup and start the async dma engines
4177 *
4178 * @rdev: radeon_device pointer
4179 *
4180 * Set up the gfx DMA ring buffers and enable them (CIK).
4181 * Returns 0 for success, error for failure.
4182 */
4183 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4184 {
4185 struct radeon_ring *ring;
4186 u32 rb_cntl, ib_cntl;
4187 u32 rb_bufsz;
4188 u32 reg_offset, wb_offset;
4189 int i, r;
4190
4191 for (i = 0; i < 2; i++) {
4192 if (i == 0) {
4193 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4194 reg_offset = SDMA0_REGISTER_OFFSET;
4195 wb_offset = R600_WB_DMA_RPTR_OFFSET;
4196 } else {
4197 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4198 reg_offset = SDMA1_REGISTER_OFFSET;
4199 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4200 }
4201
4202 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4203 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4204
4205 /* Set ring buffer size in dwords */
4206 rb_bufsz = drm_order(ring->ring_size / 4);
4207 rb_cntl = rb_bufsz << 1;
4208 #ifdef __BIG_ENDIAN
4209 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4210 #endif
4211 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4212
4213 /* Initialize the ring buffer's read and write pointers */
4214 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4215 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4216
4217 /* set the wb address whether it's enabled or not */
4218 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4219 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4220 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4221 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4222
4223 if (rdev->wb.enabled)
4224 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4225
4226 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4227 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4228
4229 ring->wptr = 0;
4230 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4231
4232 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4233
4234 /* enable DMA RB */
4235 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4236
4237 ib_cntl = SDMA_IB_ENABLE;
4238 #ifdef __BIG_ENDIAN
4239 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4240 #endif
4241 /* enable DMA IBs */
4242 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4243
4244 ring->ready = true;
4245
4246 r = radeon_ring_test(rdev, ring->idx, ring);
4247 if (r) {
4248 ring->ready = false;
4249 return r;
4250 }
4251 }
4252
4253 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4254
4255 return 0;
4256 }
4257
4258 /**
4259 * cik_sdma_rlc_resume - setup and start the async dma engines
4260 *
4261 * @rdev: radeon_device pointer
4262 *
4263 * Set up the compute DMA queues and enable them (CIK).
4264 * Returns 0 for success, error for failure.
4265 */
4266 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4267 {
4268 /* XXX todo */
4269 return 0;
4270 }
4271
4272 /**
4273 * cik_sdma_load_microcode - load the sDMA ME ucode
4274 *
4275 * @rdev: radeon_device pointer
4276 *
4277 * Loads the sDMA0/1 ucode.
4278 * Returns 0 for success, -EINVAL if the ucode is not available.
4279 */
4280 static int cik_sdma_load_microcode(struct radeon_device *rdev)
4281 {
4282 const __be32 *fw_data;
4283 int i;
4284
4285 if (!rdev->sdma_fw)
4286 return -EINVAL;
4287
4288 /* stop the gfx rings and rlc compute queues */
4289 cik_sdma_gfx_stop(rdev);
4290 cik_sdma_rlc_stop(rdev);
4291
4292 /* halt the MEs */
4293 cik_sdma_enable(rdev, false);
4294
4295 /* sdma0 */
4296 fw_data = (const __be32 *)rdev->sdma_fw->data;
4297 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4298 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4299 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4300 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4301
4302 /* sdma1 */
4303 fw_data = (const __be32 *)rdev->sdma_fw->data;
4304 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4305 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4306 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4307 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4308
4309 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4310 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4311 return 0;
4312 }
4313
4314 /**
4315 * cik_sdma_resume - setup and start the async dma engines
4316 *
4317 * @rdev: radeon_device pointer
4318 *
4319 * Set up the DMA engines and enable them (CIK).
4320 * Returns 0 for success, error for failure.
4321 */
4322 static int cik_sdma_resume(struct radeon_device *rdev)
4323 {
4324 int r;
4325
4326 /* Reset dma */
4327 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4328 RREG32(SRBM_SOFT_RESET);
4329 udelay(50);
4330 WREG32(SRBM_SOFT_RESET, 0);
4331 RREG32(SRBM_SOFT_RESET);
4332
4333 r = cik_sdma_load_microcode(rdev);
4334 if (r)
4335 return r;
4336
4337 /* unhalt the MEs */
4338 cik_sdma_enable(rdev, true);
4339
4340 /* start the gfx rings and rlc compute queues */
4341 r = cik_sdma_gfx_resume(rdev);
4342 if (r)
4343 return r;
4344 r = cik_sdma_rlc_resume(rdev);
4345 if (r)
4346 return r;
4347
4348 return 0;
4349 }
4350
4351 /**
4352 * cik_sdma_fini - tear down the async dma engines
4353 *
4354 * @rdev: radeon_device pointer
4355 *
4356 * Stop the async dma engines and free the rings (CIK).
4357 */
4358 static void cik_sdma_fini(struct radeon_device *rdev)
4359 {
4360 /* stop the gfx rings and rlc compute queues */
4361 cik_sdma_gfx_stop(rdev);
4362 cik_sdma_rlc_stop(rdev);
4363 /* halt the MEs */
4364 cik_sdma_enable(rdev, false);
4365 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4366 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4367 /* XXX - compute dma queue tear down */
4368 }
4369
4370 /**
4371 * cik_copy_dma - copy pages using the DMA engine
4372 *
4373 * @rdev: radeon_device pointer
4374 * @src_offset: src GPU address
4375 * @dst_offset: dst GPU address
4376 * @num_gpu_pages: number of GPU pages to xfer
4377 * @fence: radeon fence object
4378 *
4379 * Copy GPU paging using the DMA engine (CIK).
4380 * Used by the radeon ttm implementation to move pages if
4381 * registered as the asic copy callback.
4382 */
4383 int cik_copy_dma(struct radeon_device *rdev,
4384 uint64_t src_offset, uint64_t dst_offset,
4385 unsigned num_gpu_pages,
4386 struct radeon_fence **fence)
4387 {
4388 struct radeon_semaphore *sem = NULL;
4389 int ring_index = rdev->asic->copy.dma_ring_index;
4390 struct radeon_ring *ring = &rdev->ring[ring_index];
4391 u32 size_in_bytes, cur_size_in_bytes;
4392 int i, num_loops;
4393 int r = 0;
4394
4395 r = radeon_semaphore_create(rdev, &sem);
4396 if (r) {
4397 DRM_ERROR("radeon: moving bo (%d).\n", r);
4398 return r;
4399 }
4400
4401 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4402 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4403 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4404 if (r) {
4405 DRM_ERROR("radeon: moving bo (%d).\n", r);
4406 radeon_semaphore_free(rdev, &sem, NULL);
4407 return r;
4408 }
4409
4410 if (radeon_fence_need_sync(*fence, ring->idx)) {
4411 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4412 ring->idx);
4413 radeon_fence_note_sync(*fence, ring->idx);
4414 } else {
4415 radeon_semaphore_free(rdev, &sem, NULL);
4416 }
4417
4418 for (i = 0; i < num_loops; i++) {
4419 cur_size_in_bytes = size_in_bytes;
4420 if (cur_size_in_bytes > 0x1fffff)
4421 cur_size_in_bytes = 0x1fffff;
4422 size_in_bytes -= cur_size_in_bytes;
4423 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4424 radeon_ring_write(ring, cur_size_in_bytes);
4425 radeon_ring_write(ring, 0); /* src/dst endian swap */
4426 radeon_ring_write(ring, src_offset & 0xffffffff);
4427 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4428 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4429 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4430 src_offset += cur_size_in_bytes;
4431 dst_offset += cur_size_in_bytes;
4432 }
4433
4434 r = radeon_fence_emit(rdev, fence, ring->idx);
4435 if (r) {
4436 radeon_ring_unlock_undo(rdev, ring);
4437 return r;
4438 }
4439
4440 radeon_ring_unlock_commit(rdev, ring);
4441 radeon_semaphore_free(rdev, &sem, *fence);
4442
4443 return r;
4444 }
4445
4446 /**
4447 * cik_sdma_ring_test - simple async dma engine test
4448 *
4449 * @rdev: radeon_device pointer
4450 * @ring: radeon_ring structure holding ring information
4451 *
4452 * Test the DMA engine by writing using it to write an
4453 * value to memory. (CIK).
4454 * Returns 0 for success, error for failure.
4455 */
4456 int cik_sdma_ring_test(struct radeon_device *rdev,
4457 struct radeon_ring *ring)
4458 {
4459 unsigned i;
4460 int r;
4461 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4462 u32 tmp;
4463
4464 if (!ptr) {
4465 DRM_ERROR("invalid vram scratch pointer\n");
4466 return -EINVAL;
4467 }
4468
4469 tmp = 0xCAFEDEAD;
4470 writel(tmp, ptr);
4471
4472 r = radeon_ring_lock(rdev, ring, 4);
4473 if (r) {
4474 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4475 return r;
4476 }
4477 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4478 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4479 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4480 radeon_ring_write(ring, 1); /* number of DWs to follow */
4481 radeon_ring_write(ring, 0xDEADBEEF);
4482 radeon_ring_unlock_commit(rdev, ring);
4483
4484 for (i = 0; i < rdev->usec_timeout; i++) {
4485 tmp = readl(ptr);
4486 if (tmp == 0xDEADBEEF)
4487 break;
4488 DRM_UDELAY(1);
4489 }
4490
4491 if (i < rdev->usec_timeout) {
4492 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4493 } else {
4494 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4495 ring->idx, tmp);
4496 r = -EINVAL;
4497 }
4498 return r;
4499 }
4500
4501 /**
4502 * cik_sdma_ib_test - test an IB on the DMA engine
4503 *
4504 * @rdev: radeon_device pointer
4505 * @ring: radeon_ring structure holding ring information
4506 *
4507 * Test a simple IB in the DMA ring (CIK).
4508 * Returns 0 on success, error on failure.
4509 */
4510 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4511 {
4512 struct radeon_ib ib;
4513 unsigned i;
4514 int r;
4515 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4516 u32 tmp = 0;
4517
4518 if (!ptr) {
4519 DRM_ERROR("invalid vram scratch pointer\n");
4520 return -EINVAL;
4521 }
4522
4523 tmp = 0xCAFEDEAD;
4524 writel(tmp, ptr);
4525
4526 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4527 if (r) {
4528 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4529 return r;
4530 }
4531
4532 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4533 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4534 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4535 ib.ptr[3] = 1;
4536 ib.ptr[4] = 0xDEADBEEF;
4537 ib.length_dw = 5;
4538
4539 r = radeon_ib_schedule(rdev, &ib, NULL);
4540 if (r) {
4541 radeon_ib_free(rdev, &ib);
4542 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4543 return r;
4544 }
4545 r = radeon_fence_wait(ib.fence, false);
4546 if (r) {
4547 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4548 return r;
4549 }
4550 for (i = 0; i < rdev->usec_timeout; i++) {
4551 tmp = readl(ptr);
4552 if (tmp == 0xDEADBEEF)
4553 break;
4554 DRM_UDELAY(1);
4555 }
4556 if (i < rdev->usec_timeout) {
4557 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4558 } else {
4559 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4560 r = -EINVAL;
4561 }
4562 radeon_ib_free(rdev, &ib);
4563 return r;
4564 }
4565
4566
4567 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4568 {
4569 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4570 RREG32(GRBM_STATUS));
4571 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4572 RREG32(GRBM_STATUS2));
4573 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4574 RREG32(GRBM_STATUS_SE0));
4575 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4576 RREG32(GRBM_STATUS_SE1));
4577 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4578 RREG32(GRBM_STATUS_SE2));
4579 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4580 RREG32(GRBM_STATUS_SE3));
4581 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4582 RREG32(SRBM_STATUS));
4583 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4584 RREG32(SRBM_STATUS2));
4585 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4586 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4587 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4588 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4589 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4590 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4591 RREG32(CP_STALLED_STAT1));
4592 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4593 RREG32(CP_STALLED_STAT2));
4594 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4595 RREG32(CP_STALLED_STAT3));
4596 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4597 RREG32(CP_CPF_BUSY_STAT));
4598 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4599 RREG32(CP_CPF_STALLED_STAT1));
4600 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4601 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4602 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4603 RREG32(CP_CPC_STALLED_STAT1));
4604 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4605 }
4606
4607 /**
4608 * cik_gpu_check_soft_reset - check which blocks are busy
4609 *
4610 * @rdev: radeon_device pointer
4611 *
4612 * Check which blocks are busy and return the relevant reset
4613 * mask to be used by cik_gpu_soft_reset().
4614 * Returns a mask of the blocks to be reset.
4615 */
4616 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4617 {
4618 u32 reset_mask = 0;
4619 u32 tmp;
4620
4621 /* GRBM_STATUS */
4622 tmp = RREG32(GRBM_STATUS);
4623 if (tmp & (PA_BUSY | SC_BUSY |
4624 BCI_BUSY | SX_BUSY |
4625 TA_BUSY | VGT_BUSY |
4626 DB_BUSY | CB_BUSY |
4627 GDS_BUSY | SPI_BUSY |
4628 IA_BUSY | IA_BUSY_NO_DMA))
4629 reset_mask |= RADEON_RESET_GFX;
4630
4631 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4632 reset_mask |= RADEON_RESET_CP;
4633
4634 /* GRBM_STATUS2 */
4635 tmp = RREG32(GRBM_STATUS2);
4636 if (tmp & RLC_BUSY)
4637 reset_mask |= RADEON_RESET_RLC;
4638
4639 /* SDMA0_STATUS_REG */
4640 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4641 if (!(tmp & SDMA_IDLE))
4642 reset_mask |= RADEON_RESET_DMA;
4643
4644 /* SDMA1_STATUS_REG */
4645 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4646 if (!(tmp & SDMA_IDLE))
4647 reset_mask |= RADEON_RESET_DMA1;
4648
4649 /* SRBM_STATUS2 */
4650 tmp = RREG32(SRBM_STATUS2);
4651 if (tmp & SDMA_BUSY)
4652 reset_mask |= RADEON_RESET_DMA;
4653
4654 if (tmp & SDMA1_BUSY)
4655 reset_mask |= RADEON_RESET_DMA1;
4656
4657 /* SRBM_STATUS */
4658 tmp = RREG32(SRBM_STATUS);
4659
4660 if (tmp & IH_BUSY)
4661 reset_mask |= RADEON_RESET_IH;
4662
4663 if (tmp & SEM_BUSY)
4664 reset_mask |= RADEON_RESET_SEM;
4665
4666 if (tmp & GRBM_RQ_PENDING)
4667 reset_mask |= RADEON_RESET_GRBM;
4668
4669 if (tmp & VMC_BUSY)
4670 reset_mask |= RADEON_RESET_VMC;
4671
4672 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4673 MCC_BUSY | MCD_BUSY))
4674 reset_mask |= RADEON_RESET_MC;
4675
4676 if (evergreen_is_display_hung(rdev))
4677 reset_mask |= RADEON_RESET_DISPLAY;
4678
4679 /* Skip MC reset as it's mostly likely not hung, just busy */
4680 if (reset_mask & RADEON_RESET_MC) {
4681 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4682 reset_mask &= ~RADEON_RESET_MC;
4683 }
4684
4685 return reset_mask;
4686 }
4687
4688 /**
4689 * cik_gpu_soft_reset - soft reset GPU
4690 *
4691 * @rdev: radeon_device pointer
4692 * @reset_mask: mask of which blocks to reset
4693 *
4694 * Soft reset the blocks specified in @reset_mask.
4695 */
4696 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4697 {
4698 struct evergreen_mc_save save;
4699 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4700 u32 tmp;
4701
4702 if (reset_mask == 0)
4703 return;
4704
4705 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4706
4707 cik_print_gpu_status_regs(rdev);
4708 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4709 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4710 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4711 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4712
4713 /* stop the rlc */
4714 cik_rlc_stop(rdev);
4715
4716 /* Disable GFX parsing/prefetching */
4717 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4718
4719 /* Disable MEC parsing/prefetching */
4720 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4721
4722 if (reset_mask & RADEON_RESET_DMA) {
4723 /* sdma0 */
4724 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4725 tmp |= SDMA_HALT;
4726 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4727 }
4728 if (reset_mask & RADEON_RESET_DMA1) {
4729 /* sdma1 */
4730 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4731 tmp |= SDMA_HALT;
4732 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4733 }
4734
4735 evergreen_mc_stop(rdev, &save);
4736 if (evergreen_mc_wait_for_idle(rdev)) {
4737 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4738 }
4739
4740 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4741 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4742
4743 if (reset_mask & RADEON_RESET_CP) {
4744 grbm_soft_reset |= SOFT_RESET_CP;
4745
4746 srbm_soft_reset |= SOFT_RESET_GRBM;
4747 }
4748
4749 if (reset_mask & RADEON_RESET_DMA)
4750 srbm_soft_reset |= SOFT_RESET_SDMA;
4751
4752 if (reset_mask & RADEON_RESET_DMA1)
4753 srbm_soft_reset |= SOFT_RESET_SDMA1;
4754
4755 if (reset_mask & RADEON_RESET_DISPLAY)
4756 srbm_soft_reset |= SOFT_RESET_DC;
4757
4758 if (reset_mask & RADEON_RESET_RLC)
4759 grbm_soft_reset |= SOFT_RESET_RLC;
4760
4761 if (reset_mask & RADEON_RESET_SEM)
4762 srbm_soft_reset |= SOFT_RESET_SEM;
4763
4764 if (reset_mask & RADEON_RESET_IH)
4765 srbm_soft_reset |= SOFT_RESET_IH;
4766
4767 if (reset_mask & RADEON_RESET_GRBM)
4768 srbm_soft_reset |= SOFT_RESET_GRBM;
4769
4770 if (reset_mask & RADEON_RESET_VMC)
4771 srbm_soft_reset |= SOFT_RESET_VMC;
4772
4773 if (!(rdev->flags & RADEON_IS_IGP)) {
4774 if (reset_mask & RADEON_RESET_MC)
4775 srbm_soft_reset |= SOFT_RESET_MC;
4776 }
4777
4778 if (grbm_soft_reset) {
4779 tmp = RREG32(GRBM_SOFT_RESET);
4780 tmp |= grbm_soft_reset;
4781 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4782 WREG32(GRBM_SOFT_RESET, tmp);
4783 tmp = RREG32(GRBM_SOFT_RESET);
4784
4785 udelay(50);
4786
4787 tmp &= ~grbm_soft_reset;
4788 WREG32(GRBM_SOFT_RESET, tmp);
4789 tmp = RREG32(GRBM_SOFT_RESET);
4790 }
4791
4792 if (srbm_soft_reset) {
4793 tmp = RREG32(SRBM_SOFT_RESET);
4794 tmp |= srbm_soft_reset;
4795 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4796 WREG32(SRBM_SOFT_RESET, tmp);
4797 tmp = RREG32(SRBM_SOFT_RESET);
4798
4799 udelay(50);
4800
4801 tmp &= ~srbm_soft_reset;
4802 WREG32(SRBM_SOFT_RESET, tmp);
4803 tmp = RREG32(SRBM_SOFT_RESET);
4804 }
4805
4806 /* Wait a little for things to settle down */
4807 udelay(50);
4808
4809 evergreen_mc_resume(rdev, &save);
4810 udelay(50);
4811
4812 cik_print_gpu_status_regs(rdev);
4813 }
4814
4815 /**
4816 * cik_asic_reset - soft reset GPU
4817 *
4818 * @rdev: radeon_device pointer
4819 *
4820 * Look up which blocks are hung and attempt
4821 * to reset them.
4822 * Returns 0 for success.
4823 */
4824 int cik_asic_reset(struct radeon_device *rdev)
4825 {
4826 u32 reset_mask;
4827
4828 reset_mask = cik_gpu_check_soft_reset(rdev);
4829
4830 if (reset_mask)
4831 r600_set_bios_scratch_engine_hung(rdev, true);
4832
4833 cik_gpu_soft_reset(rdev, reset_mask);
4834
4835 reset_mask = cik_gpu_check_soft_reset(rdev);
4836
4837 if (!reset_mask)
4838 r600_set_bios_scratch_engine_hung(rdev, false);
4839
4840 return 0;
4841 }
4842
4843 /**
4844 * cik_gfx_is_lockup - check if the 3D engine is locked up
4845 *
4846 * @rdev: radeon_device pointer
4847 * @ring: radeon_ring structure holding ring information
4848 *
4849 * Check if the 3D engine is locked up (CIK).
4850 * Returns true if the engine is locked, false if not.
4851 */
4852 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4853 {
4854 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4855
4856 if (!(reset_mask & (RADEON_RESET_GFX |
4857 RADEON_RESET_COMPUTE |
4858 RADEON_RESET_CP))) {
4859 radeon_ring_lockup_update(ring);
4860 return false;
4861 }
4862 /* force CP activities */
4863 radeon_ring_force_activity(rdev, ring);
4864 return radeon_ring_test_lockup(rdev, ring);
4865 }
4866
4867 /**
4868 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4869 *
4870 * @rdev: radeon_device pointer
4871 * @ring: radeon_ring structure holding ring information
4872 *
4873 * Check if the async DMA engine is locked up (CIK).
4874 * Returns true if the engine appears to be locked up, false if not.
4875 */
4876 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4877 {
4878 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4879 u32 mask;
4880
4881 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4882 mask = RADEON_RESET_DMA;
4883 else
4884 mask = RADEON_RESET_DMA1;
4885
4886 if (!(reset_mask & mask)) {
4887 radeon_ring_lockup_update(ring);
4888 return false;
4889 }
4890 /* force ring activities */
4891 radeon_ring_force_activity(rdev, ring);
4892 return radeon_ring_test_lockup(rdev, ring);
4893 }
4894
4895 /* MC */
4896 /**
4897 * cik_mc_program - program the GPU memory controller
4898 *
4899 * @rdev: radeon_device pointer
4900 *
4901 * Set the location of vram, gart, and AGP in the GPU's
4902 * physical address space (CIK).
4903 */
4904 static void cik_mc_program(struct radeon_device *rdev)
4905 {
4906 struct evergreen_mc_save save;
4907 u32 tmp;
4908 int i, j;
4909
4910 /* Initialize HDP */
4911 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4912 WREG32((0x2c14 + j), 0x00000000);
4913 WREG32((0x2c18 + j), 0x00000000);
4914 WREG32((0x2c1c + j), 0x00000000);
4915 WREG32((0x2c20 + j), 0x00000000);
4916 WREG32((0x2c24 + j), 0x00000000);
4917 }
4918 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4919
4920 evergreen_mc_stop(rdev, &save);
4921 if (radeon_mc_wait_for_idle(rdev)) {
4922 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4923 }
4924 /* Lockout access through VGA aperture*/
4925 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4926 /* Update configuration */
4927 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4928 rdev->mc.vram_start >> 12);
4929 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4930 rdev->mc.vram_end >> 12);
4931 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4932 rdev->vram_scratch.gpu_addr >> 12);
4933 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4934 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4935 WREG32(MC_VM_FB_LOCATION, tmp);
4936 /* XXX double check these! */
4937 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4938 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4939 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4940 WREG32(MC_VM_AGP_BASE, 0);
4941 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4942 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4943 if (radeon_mc_wait_for_idle(rdev)) {
4944 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4945 }
4946 evergreen_mc_resume(rdev, &save);
4947 /* we need to own VRAM, so turn off the VGA renderer here
4948 * to stop it overwriting our objects */
4949 rv515_vga_render_disable(rdev);
4950 }
4951
4952 /**
4953 * cik_mc_init - initialize the memory controller driver params
4954 *
4955 * @rdev: radeon_device pointer
4956 *
4957 * Look up the amount of vram, vram width, and decide how to place
4958 * vram and gart within the GPU's physical address space (CIK).
4959 * Returns 0 for success.
4960 */
4961 static int cik_mc_init(struct radeon_device *rdev)
4962 {
4963 u32 tmp;
4964 int chansize, numchan;
4965
4966 /* Get VRAM informations */
4967 rdev->mc.vram_is_ddr = true;
4968 tmp = RREG32(MC_ARB_RAMCFG);
4969 if (tmp & CHANSIZE_MASK) {
4970 chansize = 64;
4971 } else {
4972 chansize = 32;
4973 }
4974 tmp = RREG32(MC_SHARED_CHMAP);
4975 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4976 case 0:
4977 default:
4978 numchan = 1;
4979 break;
4980 case 1:
4981 numchan = 2;
4982 break;
4983 case 2:
4984 numchan = 4;
4985 break;
4986 case 3:
4987 numchan = 8;
4988 break;
4989 case 4:
4990 numchan = 3;
4991 break;
4992 case 5:
4993 numchan = 6;
4994 break;
4995 case 6:
4996 numchan = 10;
4997 break;
4998 case 7:
4999 numchan = 12;
5000 break;
5001 case 8:
5002 numchan = 16;
5003 break;
5004 }
5005 rdev->mc.vram_width = numchan * chansize;
5006 /* Could aper size report 0 ? */
5007 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5008 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5009 /* size in MB on si */
5010 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5011 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5012 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5013 si_vram_gtt_location(rdev, &rdev->mc);
5014 radeon_update_bandwidth_info(rdev);
5015
5016 return 0;
5017 }
5018
5019 /*
5020 * GART
5021 * VMID 0 is the physical GPU addresses as used by the kernel.
5022 * VMIDs 1-15 are used for userspace clients and are handled
5023 * by the radeon vm/hsa code.
5024 */
5025 /**
5026 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5027 *
5028 * @rdev: radeon_device pointer
5029 *
5030 * Flush the TLB for the VMID 0 page table (CIK).
5031 */
5032 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5033 {
5034 /* flush hdp cache */
5035 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5036
5037 /* bits 0-15 are the VM contexts0-15 */
5038 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5039 }
5040
5041 /**
5042 * cik_pcie_gart_enable - gart enable
5043 *
5044 * @rdev: radeon_device pointer
5045 *
5046 * This sets up the TLBs, programs the page tables for VMID0,
5047 * sets up the hw for VMIDs 1-15 which are allocated on
5048 * demand, and sets up the global locations for the LDS, GDS,
5049 * and GPUVM for FSA64 clients (CIK).
5050 * Returns 0 for success, errors for failure.
5051 */
5052 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5053 {
5054 int r, i;
5055
5056 if (rdev->gart.robj == NULL) {
5057 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5058 return -EINVAL;
5059 }
5060 r = radeon_gart_table_vram_pin(rdev);
5061 if (r)
5062 return r;
5063 radeon_gart_restore(rdev);
5064 /* Setup TLB control */
5065 WREG32(MC_VM_MX_L1_TLB_CNTL,
5066 (0xA << 7) |
5067 ENABLE_L1_TLB |
5068 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5069 ENABLE_ADVANCED_DRIVER_MODEL |
5070 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5071 /* Setup L2 cache */
5072 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5073 ENABLE_L2_FRAGMENT_PROCESSING |
5074 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5075 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5076 EFFECTIVE_L2_QUEUE_SIZE(7) |
5077 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5078 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5079 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5080 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5081 /* setup context0 */
5082 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5083 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5084 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5085 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5086 (u32)(rdev->dummy_page.addr >> 12));
5087 WREG32(VM_CONTEXT0_CNTL2, 0);
5088 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5089 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5090
5091 WREG32(0x15D4, 0);
5092 WREG32(0x15D8, 0);
5093 WREG32(0x15DC, 0);
5094
5095 /* empty context1-15 */
5096 /* FIXME start with 4G, once using 2 level pt switch to full
5097 * vm size space
5098 */
5099 /* set vm size, must be a multiple of 4 */
5100 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5101 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5102 for (i = 1; i < 16; i++) {
5103 if (i < 8)
5104 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5105 rdev->gart.table_addr >> 12);
5106 else
5107 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5108 rdev->gart.table_addr >> 12);
5109 }
5110
5111 /* enable context1-15 */
5112 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5113 (u32)(rdev->dummy_page.addr >> 12));
5114 WREG32(VM_CONTEXT1_CNTL2, 4);
5115 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5116 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5117 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5118 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5119 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5120 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5121 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5122 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5123 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5124 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5125 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5126 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5127 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5128
5129 /* TC cache setup ??? */
5130 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5131 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5132 WREG32(TC_CFG_L1_STORE_POLICY, 0);
5133
5134 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5135 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5136 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5137 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5138 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5139
5140 WREG32(TC_CFG_L1_VOLATILE, 0);
5141 WREG32(TC_CFG_L2_VOLATILE, 0);
5142
5143 if (rdev->family == CHIP_KAVERI) {
5144 u32 tmp = RREG32(CHUB_CONTROL);
5145 tmp &= ~BYPASS_VM;
5146 WREG32(CHUB_CONTROL, tmp);
5147 }
5148
5149 /* XXX SH_MEM regs */
5150 /* where to put LDS, scratch, GPUVM in FSA64 space */
5151 mutex_lock(&rdev->srbm_mutex);
5152 for (i = 0; i < 16; i++) {
5153 cik_srbm_select(rdev, 0, 0, 0, i);
5154 /* CP and shaders */
5155 WREG32(SH_MEM_CONFIG, 0);
5156 WREG32(SH_MEM_APE1_BASE, 1);
5157 WREG32(SH_MEM_APE1_LIMIT, 0);
5158 WREG32(SH_MEM_BASES, 0);
5159 /* SDMA GFX */
5160 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5161 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5162 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5163 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5164 /* XXX SDMA RLC - todo */
5165 }
5166 cik_srbm_select(rdev, 0, 0, 0, 0);
5167 mutex_unlock(&rdev->srbm_mutex);
5168
5169 cik_pcie_gart_tlb_flush(rdev);
5170 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5171 (unsigned)(rdev->mc.gtt_size >> 20),
5172 (unsigned long long)rdev->gart.table_addr);
5173 rdev->gart.ready = true;
5174 return 0;
5175 }
5176
5177 /**
5178 * cik_pcie_gart_disable - gart disable
5179 *
5180 * @rdev: radeon_device pointer
5181 *
5182 * This disables all VM page table (CIK).
5183 */
5184 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5185 {
5186 /* Disable all tables */
5187 WREG32(VM_CONTEXT0_CNTL, 0);
5188 WREG32(VM_CONTEXT1_CNTL, 0);
5189 /* Setup TLB control */
5190 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5191 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5192 /* Setup L2 cache */
5193 WREG32(VM_L2_CNTL,
5194 ENABLE_L2_FRAGMENT_PROCESSING |
5195 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5196 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5197 EFFECTIVE_L2_QUEUE_SIZE(7) |
5198 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5199 WREG32(VM_L2_CNTL2, 0);
5200 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5201 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5202 radeon_gart_table_vram_unpin(rdev);
5203 }
5204
5205 /**
5206 * cik_pcie_gart_fini - vm fini callback
5207 *
5208 * @rdev: radeon_device pointer
5209 *
5210 * Tears down the driver GART/VM setup (CIK).
5211 */
5212 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5213 {
5214 cik_pcie_gart_disable(rdev);
5215 radeon_gart_table_vram_free(rdev);
5216 radeon_gart_fini(rdev);
5217 }
5218
5219 /* vm parser */
5220 /**
5221 * cik_ib_parse - vm ib_parse callback
5222 *
5223 * @rdev: radeon_device pointer
5224 * @ib: indirect buffer pointer
5225 *
5226 * CIK uses hw IB checking so this is a nop (CIK).
5227 */
5228 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5229 {
5230 return 0;
5231 }
5232
5233 /*
5234 * vm
5235 * VMID 0 is the physical GPU addresses as used by the kernel.
5236 * VMIDs 1-15 are used for userspace clients and are handled
5237 * by the radeon vm/hsa code.
5238 */
5239 /**
5240 * cik_vm_init - cik vm init callback
5241 *
5242 * @rdev: radeon_device pointer
5243 *
5244 * Inits cik specific vm parameters (number of VMs, base of vram for
5245 * VMIDs 1-15) (CIK).
5246 * Returns 0 for success.
5247 */
5248 int cik_vm_init(struct radeon_device *rdev)
5249 {
5250 /* number of VMs */
5251 rdev->vm_manager.nvm = 16;
5252 /* base offset of vram pages */
5253 if (rdev->flags & RADEON_IS_IGP) {
5254 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5255 tmp <<= 22;
5256 rdev->vm_manager.vram_base_offset = tmp;
5257 } else
5258 rdev->vm_manager.vram_base_offset = 0;
5259
5260 return 0;
5261 }
5262
5263 /**
5264 * cik_vm_fini - cik vm fini callback
5265 *
5266 * @rdev: radeon_device pointer
5267 *
5268 * Tear down any asic specific VM setup (CIK).
5269 */
5270 void cik_vm_fini(struct radeon_device *rdev)
5271 {
5272 }
5273
5274 /**
5275 * cik_vm_decode_fault - print human readable fault info
5276 *
5277 * @rdev: radeon_device pointer
5278 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5279 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5280 *
5281 * Print human readable fault information (CIK).
5282 */
5283 static void cik_vm_decode_fault(struct radeon_device *rdev,
5284 u32 status, u32 addr, u32 mc_client)
5285 {
5286 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5287 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5288 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5289 char *block = (char *)&mc_client;
5290
5291 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5292 protections, vmid, addr,
5293 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5294 block, mc_id);
5295 }
5296
5297 /**
5298 * cik_vm_flush - cik vm flush using the CP
5299 *
5300 * @rdev: radeon_device pointer
5301 *
5302 * Update the page table base and flush the VM TLB
5303 * using the CP (CIK).
5304 */
5305 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5306 {
5307 struct radeon_ring *ring = &rdev->ring[ridx];
5308
5309 if (vm == NULL)
5310 return;
5311
5312 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5313 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5314 WRITE_DATA_DST_SEL(0)));
5315 if (vm->id < 8) {
5316 radeon_ring_write(ring,
5317 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5318 } else {
5319 radeon_ring_write(ring,
5320 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5321 }
5322 radeon_ring_write(ring, 0);
5323 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5324
5325 /* update SH_MEM_* regs */
5326 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5327 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5328 WRITE_DATA_DST_SEL(0)));
5329 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5330 radeon_ring_write(ring, 0);
5331 radeon_ring_write(ring, VMID(vm->id));
5332
5333 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5334 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5335 WRITE_DATA_DST_SEL(0)));
5336 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5337 radeon_ring_write(ring, 0);
5338
5339 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5340 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5341 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5342 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5343
5344 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5345 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5346 WRITE_DATA_DST_SEL(0)));
5347 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5348 radeon_ring_write(ring, 0);
5349 radeon_ring_write(ring, VMID(0));
5350
5351 /* HDP flush */
5352 /* We should be using the WAIT_REG_MEM packet here like in
5353 * cik_fence_ring_emit(), but it causes the CP to hang in this
5354 * context...
5355 */
5356 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5357 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5358 WRITE_DATA_DST_SEL(0)));
5359 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5360 radeon_ring_write(ring, 0);
5361 radeon_ring_write(ring, 0);
5362
5363 /* bits 0-15 are the VM contexts0-15 */
5364 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5365 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5366 WRITE_DATA_DST_SEL(0)));
5367 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5368 radeon_ring_write(ring, 0);
5369 radeon_ring_write(ring, 1 << vm->id);
5370
5371 /* compute doesn't have PFP */
5372 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5373 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5374 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5375 radeon_ring_write(ring, 0x0);
5376 }
5377 }
5378
5379 /**
5380 * cik_vm_set_page - update the page tables using sDMA
5381 *
5382 * @rdev: radeon_device pointer
5383 * @ib: indirect buffer to fill with commands
5384 * @pe: addr of the page entry
5385 * @addr: dst addr to write into pe
5386 * @count: number of page entries to update
5387 * @incr: increase next addr by incr bytes
5388 * @flags: access flags
5389 *
5390 * Update the page tables using CP or sDMA (CIK).
5391 */
5392 void cik_vm_set_page(struct radeon_device *rdev,
5393 struct radeon_ib *ib,
5394 uint64_t pe,
5395 uint64_t addr, unsigned count,
5396 uint32_t incr, uint32_t flags)
5397 {
5398 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
5399 uint64_t value;
5400 unsigned ndw;
5401
5402 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
5403 /* CP */
5404 while (count) {
5405 ndw = 2 + count * 2;
5406 if (ndw > 0x3FFE)
5407 ndw = 0x3FFE;
5408
5409 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
5410 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
5411 WRITE_DATA_DST_SEL(1));
5412 ib->ptr[ib->length_dw++] = pe;
5413 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5414 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
5415 if (flags & RADEON_VM_PAGE_SYSTEM) {
5416 value = radeon_vm_map_gart(rdev, addr);
5417 value &= 0xFFFFFFFFFFFFF000ULL;
5418 } else if (flags & RADEON_VM_PAGE_VALID) {
5419 value = addr;
5420 } else {
5421 value = 0;
5422 }
5423 addr += incr;
5424 value |= r600_flags;
5425 ib->ptr[ib->length_dw++] = value;
5426 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5427 }
5428 }
5429 } else {
5430 /* DMA */
5431 if (flags & RADEON_VM_PAGE_SYSTEM) {
5432 while (count) {
5433 ndw = count * 2;
5434 if (ndw > 0xFFFFE)
5435 ndw = 0xFFFFE;
5436
5437 /* for non-physically contiguous pages (system) */
5438 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5439 ib->ptr[ib->length_dw++] = pe;
5440 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5441 ib->ptr[ib->length_dw++] = ndw;
5442 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5443 if (flags & RADEON_VM_PAGE_SYSTEM) {
5444 value = radeon_vm_map_gart(rdev, addr);
5445 value &= 0xFFFFFFFFFFFFF000ULL;
5446 } else if (flags & RADEON_VM_PAGE_VALID) {
5447 value = addr;
5448 } else {
5449 value = 0;
5450 }
5451 addr += incr;
5452 value |= r600_flags;
5453 ib->ptr[ib->length_dw++] = value;
5454 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5455 }
5456 }
5457 } else {
5458 while (count) {
5459 ndw = count;
5460 if (ndw > 0x7FFFF)
5461 ndw = 0x7FFFF;
5462
5463 if (flags & RADEON_VM_PAGE_VALID)
5464 value = addr;
5465 else
5466 value = 0;
5467 /* for physically contiguous pages (vram) */
5468 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5469 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5470 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5471 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5472 ib->ptr[ib->length_dw++] = 0;
5473 ib->ptr[ib->length_dw++] = value; /* value */
5474 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5475 ib->ptr[ib->length_dw++] = incr; /* increment size */
5476 ib->ptr[ib->length_dw++] = 0;
5477 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5478 pe += ndw * 8;
5479 addr += ndw * incr;
5480 count -= ndw;
5481 }
5482 }
5483 while (ib->length_dw & 0x7)
5484 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5485 }
5486 }
5487
5488 /**
5489 * cik_dma_vm_flush - cik vm flush using sDMA
5490 *
5491 * @rdev: radeon_device pointer
5492 *
5493 * Update the page table base and flush the VM TLB
5494 * using sDMA (CIK).
5495 */
5496 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5497 {
5498 struct radeon_ring *ring = &rdev->ring[ridx];
5499 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5500 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5501 u32 ref_and_mask;
5502
5503 if (vm == NULL)
5504 return;
5505
5506 if (ridx == R600_RING_TYPE_DMA_INDEX)
5507 ref_and_mask = SDMA0;
5508 else
5509 ref_and_mask = SDMA1;
5510
5511 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5512 if (vm->id < 8) {
5513 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5514 } else {
5515 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5516 }
5517 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5518
5519 /* update SH_MEM_* regs */
5520 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5521 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5522 radeon_ring_write(ring, VMID(vm->id));
5523
5524 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5525 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5526 radeon_ring_write(ring, 0);
5527
5528 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5529 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5530 radeon_ring_write(ring, 0);
5531
5532 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5533 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5534 radeon_ring_write(ring, 1);
5535
5536 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5537 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5538 radeon_ring_write(ring, 0);
5539
5540 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5541 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5542 radeon_ring_write(ring, VMID(0));
5543
5544 /* flush HDP */
5545 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5546 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5547 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5548 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5549 radeon_ring_write(ring, ref_and_mask); /* MASK */
5550 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5551
5552 /* flush TLB */
5553 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5554 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5555 radeon_ring_write(ring, 1 << vm->id);
5556 }
5557
5558 /*
5559 * RLC
5560 * The RLC is a multi-purpose microengine that handles a
5561 * variety of functions, the most important of which is
5562 * the interrupt controller.
5563 */
5564 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5565 bool enable)
5566 {
5567 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5568
5569 if (enable)
5570 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5571 else
5572 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5573 WREG32(CP_INT_CNTL_RING0, tmp);
5574 }
5575
5576 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5577 {
5578 u32 tmp;
5579
5580 tmp = RREG32(RLC_LB_CNTL);
5581 if (enable)
5582 tmp |= LOAD_BALANCE_ENABLE;
5583 else
5584 tmp &= ~LOAD_BALANCE_ENABLE;
5585 WREG32(RLC_LB_CNTL, tmp);
5586 }
5587
5588 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5589 {
5590 u32 i, j, k;
5591 u32 mask;
5592
5593 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5594 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5595 cik_select_se_sh(rdev, i, j);
5596 for (k = 0; k < rdev->usec_timeout; k++) {
5597 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5598 break;
5599 udelay(1);
5600 }
5601 }
5602 }
5603 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5604
5605 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5606 for (k = 0; k < rdev->usec_timeout; k++) {
5607 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5608 break;
5609 udelay(1);
5610 }
5611 }
5612
5613 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5614 {
5615 u32 tmp;
5616
5617 tmp = RREG32(RLC_CNTL);
5618 if (tmp != rlc)
5619 WREG32(RLC_CNTL, rlc);
5620 }
5621
5622 static u32 cik_halt_rlc(struct radeon_device *rdev)
5623 {
5624 u32 data, orig;
5625
5626 orig = data = RREG32(RLC_CNTL);
5627
5628 if (data & RLC_ENABLE) {
5629 u32 i;
5630
5631 data &= ~RLC_ENABLE;
5632 WREG32(RLC_CNTL, data);
5633
5634 for (i = 0; i < rdev->usec_timeout; i++) {
5635 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5636 break;
5637 udelay(1);
5638 }
5639
5640 cik_wait_for_rlc_serdes(rdev);
5641 }
5642
5643 return orig;
5644 }
5645
5646 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5647 {
5648 u32 tmp, i, mask;
5649
5650 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5651 WREG32(RLC_GPR_REG2, tmp);
5652
5653 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5654 for (i = 0; i < rdev->usec_timeout; i++) {
5655 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5656 break;
5657 udelay(1);
5658 }
5659
5660 for (i = 0; i < rdev->usec_timeout; i++) {
5661 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5662 break;
5663 udelay(1);
5664 }
5665 }
5666
5667 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5668 {
5669 u32 tmp;
5670
5671 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5672 WREG32(RLC_GPR_REG2, tmp);
5673 }
5674
5675 /**
5676 * cik_rlc_stop - stop the RLC ME
5677 *
5678 * @rdev: radeon_device pointer
5679 *
5680 * Halt the RLC ME (MicroEngine) (CIK).
5681 */
5682 static void cik_rlc_stop(struct radeon_device *rdev)
5683 {
5684 WREG32(RLC_CNTL, 0);
5685
5686 cik_enable_gui_idle_interrupt(rdev, false);
5687
5688 cik_wait_for_rlc_serdes(rdev);
5689 }
5690
5691 /**
5692 * cik_rlc_start - start the RLC ME
5693 *
5694 * @rdev: radeon_device pointer
5695 *
5696 * Unhalt the RLC ME (MicroEngine) (CIK).
5697 */
5698 static void cik_rlc_start(struct radeon_device *rdev)
5699 {
5700 WREG32(RLC_CNTL, RLC_ENABLE);
5701
5702 cik_enable_gui_idle_interrupt(rdev, true);
5703
5704 udelay(50);
5705 }
5706
5707 /**
5708 * cik_rlc_resume - setup the RLC hw
5709 *
5710 * @rdev: radeon_device pointer
5711 *
5712 * Initialize the RLC registers, load the ucode,
5713 * and start the RLC (CIK).
5714 * Returns 0 for success, -EINVAL if the ucode is not available.
5715 */
5716 static int cik_rlc_resume(struct radeon_device *rdev)
5717 {
5718 u32 i, size, tmp;
5719 const __be32 *fw_data;
5720
5721 if (!rdev->rlc_fw)
5722 return -EINVAL;
5723
5724 switch (rdev->family) {
5725 case CHIP_BONAIRE:
5726 default:
5727 size = BONAIRE_RLC_UCODE_SIZE;
5728 break;
5729 case CHIP_KAVERI:
5730 size = KV_RLC_UCODE_SIZE;
5731 break;
5732 case CHIP_KABINI:
5733 size = KB_RLC_UCODE_SIZE;
5734 break;
5735 }
5736
5737 cik_rlc_stop(rdev);
5738
5739 /* disable CG */
5740 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5741 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5742
5743 si_rlc_reset(rdev);
5744
5745 cik_init_pg(rdev);
5746
5747 cik_init_cg(rdev);
5748
5749 WREG32(RLC_LB_CNTR_INIT, 0);
5750 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5751
5752 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5753 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5754 WREG32(RLC_LB_PARAMS, 0x00600408);
5755 WREG32(RLC_LB_CNTL, 0x80000004);
5756
5757 WREG32(RLC_MC_CNTL, 0);
5758 WREG32(RLC_UCODE_CNTL, 0);
5759
5760 fw_data = (const __be32 *)rdev->rlc_fw->data;
5761 WREG32(RLC_GPM_UCODE_ADDR, 0);
5762 for (i = 0; i < size; i++)
5763 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5764 WREG32(RLC_GPM_UCODE_ADDR, 0);
5765
5766 /* XXX - find out what chips support lbpw */
5767 cik_enable_lbpw(rdev, false);
5768
5769 if (rdev->family == CHIP_BONAIRE)
5770 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5771
5772 cik_rlc_start(rdev);
5773
5774 return 0;
5775 }
5776
5777 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5778 {
5779 u32 data, orig, tmp, tmp2;
5780
5781 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5782
5783 cik_enable_gui_idle_interrupt(rdev, enable);
5784
5785 if (enable) {
5786 tmp = cik_halt_rlc(rdev);
5787
5788 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5789 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5790 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5791 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5792 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5793
5794 cik_update_rlc(rdev, tmp);
5795
5796 data |= CGCG_EN | CGLS_EN;
5797 } else {
5798 RREG32(CB_CGTT_SCLK_CTRL);
5799 RREG32(CB_CGTT_SCLK_CTRL);
5800 RREG32(CB_CGTT_SCLK_CTRL);
5801 RREG32(CB_CGTT_SCLK_CTRL);
5802
5803 data &= ~(CGCG_EN | CGLS_EN);
5804 }
5805
5806 if (orig != data)
5807 WREG32(RLC_CGCG_CGLS_CTRL, data);
5808
5809 }
5810
5811 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5812 {
5813 u32 data, orig, tmp = 0;
5814
5815 if (enable) {
5816 orig = data = RREG32(CP_MEM_SLP_CNTL);
5817 data |= CP_MEM_LS_EN;
5818 if (orig != data)
5819 WREG32(CP_MEM_SLP_CNTL, data);
5820
5821 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5822 data &= 0xfffffffd;
5823 if (orig != data)
5824 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5825
5826 tmp = cik_halt_rlc(rdev);
5827
5828 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5829 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5830 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5831 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5832 WREG32(RLC_SERDES_WR_CTRL, data);
5833
5834 cik_update_rlc(rdev, tmp);
5835
5836 orig = data = RREG32(CGTS_SM_CTRL_REG);
5837 data &= ~SM_MODE_MASK;
5838 data |= SM_MODE(0x2);
5839 data |= SM_MODE_ENABLE;
5840 data &= ~CGTS_OVERRIDE;
5841 data &= ~CGTS_LS_OVERRIDE;
5842 data &= ~ON_MONITOR_ADD_MASK;
5843 data |= ON_MONITOR_ADD_EN;
5844 data |= ON_MONITOR_ADD(0x96);
5845 if (orig != data)
5846 WREG32(CGTS_SM_CTRL_REG, data);
5847 } else {
5848 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5849 data |= 0x00000002;
5850 if (orig != data)
5851 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5852
5853 data = RREG32(RLC_MEM_SLP_CNTL);
5854 if (data & RLC_MEM_LS_EN) {
5855 data &= ~RLC_MEM_LS_EN;
5856 WREG32(RLC_MEM_SLP_CNTL, data);
5857 }
5858
5859 data = RREG32(CP_MEM_SLP_CNTL);
5860 if (data & CP_MEM_LS_EN) {
5861 data &= ~CP_MEM_LS_EN;
5862 WREG32(CP_MEM_SLP_CNTL, data);
5863 }
5864
5865 orig = data = RREG32(CGTS_SM_CTRL_REG);
5866 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5867 if (orig != data)
5868 WREG32(CGTS_SM_CTRL_REG, data);
5869
5870 tmp = cik_halt_rlc(rdev);
5871
5872 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5873 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5874 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5875 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5876 WREG32(RLC_SERDES_WR_CTRL, data);
5877
5878 cik_update_rlc(rdev, tmp);
5879 }
5880 }
5881
5882 static const u32 mc_cg_registers[] =
5883 {
5884 MC_HUB_MISC_HUB_CG,
5885 MC_HUB_MISC_SIP_CG,
5886 MC_HUB_MISC_VM_CG,
5887 MC_XPB_CLK_GAT,
5888 ATC_MISC_CG,
5889 MC_CITF_MISC_WR_CG,
5890 MC_CITF_MISC_RD_CG,
5891 MC_CITF_MISC_VM_CG,
5892 VM_L2_CG,
5893 };
5894
5895 static void cik_enable_mc_ls(struct radeon_device *rdev,
5896 bool enable)
5897 {
5898 int i;
5899 u32 orig, data;
5900
5901 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5902 orig = data = RREG32(mc_cg_registers[i]);
5903 if (enable)
5904 data |= MC_LS_ENABLE;
5905 else
5906 data &= ~MC_LS_ENABLE;
5907 if (data != orig)
5908 WREG32(mc_cg_registers[i], data);
5909 }
5910 }
5911
5912 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5913 bool enable)
5914 {
5915 int i;
5916 u32 orig, data;
5917
5918 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5919 orig = data = RREG32(mc_cg_registers[i]);
5920 if (enable)
5921 data |= MC_CG_ENABLE;
5922 else
5923 data &= ~MC_CG_ENABLE;
5924 if (data != orig)
5925 WREG32(mc_cg_registers[i], data);
5926 }
5927 }
5928
5929 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5930 bool enable)
5931 {
5932 u32 orig, data;
5933
5934 if (enable) {
5935 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5936 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5937 } else {
5938 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5939 data |= 0xff000000;
5940 if (data != orig)
5941 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5942
5943 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5944 data |= 0xff000000;
5945 if (data != orig)
5946 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5947 }
5948 }
5949
5950 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5951 bool enable)
5952 {
5953 u32 orig, data;
5954
5955 if (enable) {
5956 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5957 data |= 0x100;
5958 if (orig != data)
5959 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5960
5961 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5962 data |= 0x100;
5963 if (orig != data)
5964 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5965 } else {
5966 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5967 data &= ~0x100;
5968 if (orig != data)
5969 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5970
5971 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5972 data &= ~0x100;
5973 if (orig != data)
5974 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5975 }
5976 }
5977
5978 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5979 bool enable)
5980 {
5981 u32 orig, data;
5982
5983 if (enable) {
5984 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5985 data = 0xfff;
5986 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5987
5988 orig = data = RREG32(UVD_CGC_CTRL);
5989 data |= DCM;
5990 if (orig != data)
5991 WREG32(UVD_CGC_CTRL, data);
5992 } else {
5993 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5994 data &= ~0xfff;
5995 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5996
5997 orig = data = RREG32(UVD_CGC_CTRL);
5998 data &= ~DCM;
5999 if (orig != data)
6000 WREG32(UVD_CGC_CTRL, data);
6001 }
6002 }
6003
6004 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6005 bool enable)
6006 {
6007 u32 orig, data;
6008
6009 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6010
6011 if (enable)
6012 data &= ~CLOCK_GATING_DIS;
6013 else
6014 data |= CLOCK_GATING_DIS;
6015
6016 if (orig != data)
6017 WREG32(HDP_HOST_PATH_CNTL, data);
6018 }
6019
6020 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6021 bool enable)
6022 {
6023 u32 orig, data;
6024
6025 orig = data = RREG32(HDP_MEM_POWER_LS);
6026
6027 if (enable)
6028 data |= HDP_LS_ENABLE;
6029 else
6030 data &= ~HDP_LS_ENABLE;
6031
6032 if (orig != data)
6033 WREG32(HDP_MEM_POWER_LS, data);
6034 }
6035
6036 void cik_update_cg(struct radeon_device *rdev,
6037 u32 block, bool enable)
6038 {
6039 if (block & RADEON_CG_BLOCK_GFX) {
6040 /* order matters! */
6041 if (enable) {
6042 cik_enable_mgcg(rdev, true);
6043 cik_enable_cgcg(rdev, true);
6044 } else {
6045 cik_enable_cgcg(rdev, false);
6046 cik_enable_mgcg(rdev, false);
6047 }
6048 }
6049
6050 if (block & RADEON_CG_BLOCK_MC) {
6051 if (!(rdev->flags & RADEON_IS_IGP)) {
6052 cik_enable_mc_mgcg(rdev, enable);
6053 cik_enable_mc_ls(rdev, enable);
6054 }
6055 }
6056
6057 if (block & RADEON_CG_BLOCK_SDMA) {
6058 cik_enable_sdma_mgcg(rdev, enable);
6059 cik_enable_sdma_mgls(rdev, enable);
6060 }
6061
6062 if (block & RADEON_CG_BLOCK_UVD) {
6063 if (rdev->has_uvd)
6064 cik_enable_uvd_mgcg(rdev, enable);
6065 }
6066
6067 if (block & RADEON_CG_BLOCK_HDP) {
6068 cik_enable_hdp_mgcg(rdev, enable);
6069 cik_enable_hdp_ls(rdev, enable);
6070 }
6071 }
6072
6073 static void cik_init_cg(struct radeon_device *rdev)
6074 {
6075
6076 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
6077
6078 if (rdev->has_uvd)
6079 si_init_uvd_internal_cg(rdev);
6080
6081 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6082 RADEON_CG_BLOCK_SDMA |
6083 RADEON_CG_BLOCK_UVD |
6084 RADEON_CG_BLOCK_HDP), true);
6085 }
6086
6087 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6088 bool enable)
6089 {
6090 u32 data, orig;
6091
6092 orig = data = RREG32(RLC_PG_CNTL);
6093 if (enable)
6094 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6095 else
6096 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6097 if (orig != data)
6098 WREG32(RLC_PG_CNTL, data);
6099 }
6100
6101 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6102 bool enable)
6103 {
6104 u32 data, orig;
6105
6106 orig = data = RREG32(RLC_PG_CNTL);
6107 if (enable)
6108 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6109 else
6110 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6111 if (orig != data)
6112 WREG32(RLC_PG_CNTL, data);
6113 }
6114
6115 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6116 {
6117 u32 data, orig;
6118
6119 orig = data = RREG32(RLC_PG_CNTL);
6120 if (enable)
6121 data &= ~DISABLE_CP_PG;
6122 else
6123 data |= DISABLE_CP_PG;
6124 if (orig != data)
6125 WREG32(RLC_PG_CNTL, data);
6126 }
6127
6128 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6129 {
6130 u32 data, orig;
6131
6132 orig = data = RREG32(RLC_PG_CNTL);
6133 if (enable)
6134 data &= ~DISABLE_GDS_PG;
6135 else
6136 data |= DISABLE_GDS_PG;
6137 if (orig != data)
6138 WREG32(RLC_PG_CNTL, data);
6139 }
6140
6141 #define CP_ME_TABLE_SIZE 96
6142 #define CP_ME_TABLE_OFFSET 2048
6143 #define CP_MEC_TABLE_OFFSET 4096
6144
6145 void cik_init_cp_pg_table(struct radeon_device *rdev)
6146 {
6147 const __be32 *fw_data;
6148 volatile u32 *dst_ptr;
6149 int me, i, max_me = 4;
6150 u32 bo_offset = 0;
6151 u32 table_offset;
6152
6153 if (rdev->family == CHIP_KAVERI)
6154 max_me = 5;
6155
6156 if (rdev->rlc.cp_table_ptr == NULL)
6157 return;
6158
6159 /* write the cp table buffer */
6160 dst_ptr = rdev->rlc.cp_table_ptr;
6161 for (me = 0; me < max_me; me++) {
6162 if (me == 0) {
6163 fw_data = (const __be32 *)rdev->ce_fw->data;
6164 table_offset = CP_ME_TABLE_OFFSET;
6165 } else if (me == 1) {
6166 fw_data = (const __be32 *)rdev->pfp_fw->data;
6167 table_offset = CP_ME_TABLE_OFFSET;
6168 } else if (me == 2) {
6169 fw_data = (const __be32 *)rdev->me_fw->data;
6170 table_offset = CP_ME_TABLE_OFFSET;
6171 } else {
6172 fw_data = (const __be32 *)rdev->mec_fw->data;
6173 table_offset = CP_MEC_TABLE_OFFSET;
6174 }
6175
6176 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6177 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
6178 }
6179 bo_offset += CP_ME_TABLE_SIZE;
6180 }
6181 }
6182
6183 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6184 bool enable)
6185 {
6186 u32 data, orig;
6187
6188 if (enable) {
6189 orig = data = RREG32(RLC_PG_CNTL);
6190 data |= GFX_PG_ENABLE;
6191 if (orig != data)
6192 WREG32(RLC_PG_CNTL, data);
6193
6194 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6195 data |= AUTO_PG_EN;
6196 if (orig != data)
6197 WREG32(RLC_AUTO_PG_CTRL, data);
6198 } else {
6199 orig = data = RREG32(RLC_PG_CNTL);
6200 data &= ~GFX_PG_ENABLE;
6201 if (orig != data)
6202 WREG32(RLC_PG_CNTL, data);
6203
6204 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6205 data &= ~AUTO_PG_EN;
6206 if (orig != data)
6207 WREG32(RLC_AUTO_PG_CTRL, data);
6208
6209 data = RREG32(DB_RENDER_CONTROL);
6210 }
6211 }
6212
6213 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6214 {
6215 u32 mask = 0, tmp, tmp1;
6216 int i;
6217
6218 cik_select_se_sh(rdev, se, sh);
6219 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6220 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6221 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6222
6223 tmp &= 0xffff0000;
6224
6225 tmp |= tmp1;
6226 tmp >>= 16;
6227
6228 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6229 mask <<= 1;
6230 mask |= 1;
6231 }
6232
6233 return (~tmp) & mask;
6234 }
6235
6236 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6237 {
6238 u32 i, j, k, active_cu_number = 0;
6239 u32 mask, counter, cu_bitmap;
6240 u32 tmp = 0;
6241
6242 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6243 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6244 mask = 1;
6245 cu_bitmap = 0;
6246 counter = 0;
6247 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6248 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6249 if (counter < 2)
6250 cu_bitmap |= mask;
6251 counter ++;
6252 }
6253 mask <<= 1;
6254 }
6255
6256 active_cu_number += counter;
6257 tmp |= (cu_bitmap << (i * 16 + j * 8));
6258 }
6259 }
6260
6261 WREG32(RLC_PG_AO_CU_MASK, tmp);
6262
6263 tmp = RREG32(RLC_MAX_PG_CU);
6264 tmp &= ~MAX_PU_CU_MASK;
6265 tmp |= MAX_PU_CU(active_cu_number);
6266 WREG32(RLC_MAX_PG_CU, tmp);
6267 }
6268
6269 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6270 bool enable)
6271 {
6272 u32 data, orig;
6273
6274 orig = data = RREG32(RLC_PG_CNTL);
6275 if (enable)
6276 data |= STATIC_PER_CU_PG_ENABLE;
6277 else
6278 data &= ~STATIC_PER_CU_PG_ENABLE;
6279 if (orig != data)
6280 WREG32(RLC_PG_CNTL, data);
6281 }
6282
6283 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6284 bool enable)
6285 {
6286 u32 data, orig;
6287
6288 orig = data = RREG32(RLC_PG_CNTL);
6289 if (enable)
6290 data |= DYN_PER_CU_PG_ENABLE;
6291 else
6292 data &= ~DYN_PER_CU_PG_ENABLE;
6293 if (orig != data)
6294 WREG32(RLC_PG_CNTL, data);
6295 }
6296
6297 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6298 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6299
6300 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6301 {
6302 u32 data, orig;
6303 u32 i;
6304
6305 if (rdev->rlc.cs_data) {
6306 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6307 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6308 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
6309 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6310 } else {
6311 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6312 for (i = 0; i < 3; i++)
6313 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6314 }
6315 if (rdev->rlc.reg_list) {
6316 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6317 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6318 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6319 }
6320
6321 orig = data = RREG32(RLC_PG_CNTL);
6322 data |= GFX_PG_SRC;
6323 if (orig != data)
6324 WREG32(RLC_PG_CNTL, data);
6325
6326 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6327 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6328
6329 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6330 data &= ~IDLE_POLL_COUNT_MASK;
6331 data |= IDLE_POLL_COUNT(0x60);
6332 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6333
6334 data = 0x10101010;
6335 WREG32(RLC_PG_DELAY, data);
6336
6337 data = RREG32(RLC_PG_DELAY_2);
6338 data &= ~0xff;
6339 data |= 0x3;
6340 WREG32(RLC_PG_DELAY_2, data);
6341
6342 data = RREG32(RLC_AUTO_PG_CTRL);
6343 data &= ~GRBM_REG_SGIT_MASK;
6344 data |= GRBM_REG_SGIT(0x700);
6345 WREG32(RLC_AUTO_PG_CTRL, data);
6346
6347 }
6348
6349 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6350 {
6351 bool has_pg = false;
6352 bool has_dyn_mgpg = false;
6353 bool has_static_mgpg = false;
6354
6355 /* only APUs have PG */
6356 if (rdev->flags & RADEON_IS_IGP) {
6357 has_pg = true;
6358 has_static_mgpg = true;
6359 if (rdev->family == CHIP_KAVERI)
6360 has_dyn_mgpg = true;
6361 }
6362
6363 if (has_pg) {
6364 cik_enable_gfx_cgpg(rdev, enable);
6365 if (enable) {
6366 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
6367 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
6368 } else {
6369 cik_enable_gfx_static_mgpg(rdev, false);
6370 cik_enable_gfx_dynamic_mgpg(rdev, false);
6371 }
6372 }
6373
6374 }
6375
6376 void cik_init_pg(struct radeon_device *rdev)
6377 {
6378 bool has_pg = false;
6379
6380 /* only APUs have PG */
6381 if (rdev->flags & RADEON_IS_IGP) {
6382 /* XXX disable this for now */
6383 /* has_pg = true; */
6384 }
6385
6386 if (has_pg) {
6387 cik_enable_sck_slowdown_on_pu(rdev, true);
6388 cik_enable_sck_slowdown_on_pd(rdev, true);
6389 cik_init_gfx_cgpg(rdev);
6390 cik_enable_cp_pg(rdev, true);
6391 cik_enable_gds_pg(rdev, true);
6392 cik_init_ao_cu_mask(rdev);
6393 cik_update_gfx_pg(rdev, true);
6394 }
6395 }
6396
6397 /*
6398 * Interrupts
6399 * Starting with r6xx, interrupts are handled via a ring buffer.
6400 * Ring buffers are areas of GPU accessible memory that the GPU
6401 * writes interrupt vectors into and the host reads vectors out of.
6402 * There is a rptr (read pointer) that determines where the
6403 * host is currently reading, and a wptr (write pointer)
6404 * which determines where the GPU has written. When the
6405 * pointers are equal, the ring is idle. When the GPU
6406 * writes vectors to the ring buffer, it increments the
6407 * wptr. When there is an interrupt, the host then starts
6408 * fetching commands and processing them until the pointers are
6409 * equal again at which point it updates the rptr.
6410 */
6411
6412 /**
6413 * cik_enable_interrupts - Enable the interrupt ring buffer
6414 *
6415 * @rdev: radeon_device pointer
6416 *
6417 * Enable the interrupt ring buffer (CIK).
6418 */
6419 static void cik_enable_interrupts(struct radeon_device *rdev)
6420 {
6421 u32 ih_cntl = RREG32(IH_CNTL);
6422 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6423
6424 ih_cntl |= ENABLE_INTR;
6425 ih_rb_cntl |= IH_RB_ENABLE;
6426 WREG32(IH_CNTL, ih_cntl);
6427 WREG32(IH_RB_CNTL, ih_rb_cntl);
6428 rdev->ih.enabled = true;
6429 }
6430
6431 /**
6432 * cik_disable_interrupts - Disable the interrupt ring buffer
6433 *
6434 * @rdev: radeon_device pointer
6435 *
6436 * Disable the interrupt ring buffer (CIK).
6437 */
6438 static void cik_disable_interrupts(struct radeon_device *rdev)
6439 {
6440 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6441 u32 ih_cntl = RREG32(IH_CNTL);
6442
6443 ih_rb_cntl &= ~IH_RB_ENABLE;
6444 ih_cntl &= ~ENABLE_INTR;
6445 WREG32(IH_RB_CNTL, ih_rb_cntl);
6446 WREG32(IH_CNTL, ih_cntl);
6447 /* set rptr, wptr to 0 */
6448 WREG32(IH_RB_RPTR, 0);
6449 WREG32(IH_RB_WPTR, 0);
6450 rdev->ih.enabled = false;
6451 rdev->ih.rptr = 0;
6452 }
6453
6454 /**
6455 * cik_disable_interrupt_state - Disable all interrupt sources
6456 *
6457 * @rdev: radeon_device pointer
6458 *
6459 * Clear all interrupt enable bits used by the driver (CIK).
6460 */
6461 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6462 {
6463 u32 tmp;
6464
6465 /* gfx ring */
6466 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6467 /* sdma */
6468 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6469 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6470 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6471 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6472 /* compute queues */
6473 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6474 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6475 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6476 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6477 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6478 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6479 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6480 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6481 /* grbm */
6482 WREG32(GRBM_INT_CNTL, 0);
6483 /* vline/vblank, etc. */
6484 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6485 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6486 if (rdev->num_crtc >= 4) {
6487 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6488 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6489 }
6490 if (rdev->num_crtc >= 6) {
6491 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6492 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6493 }
6494
6495 /* dac hotplug */
6496 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6497
6498 /* digital hotplug */
6499 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6500 WREG32(DC_HPD1_INT_CONTROL, tmp);
6501 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6502 WREG32(DC_HPD2_INT_CONTROL, tmp);
6503 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6504 WREG32(DC_HPD3_INT_CONTROL, tmp);
6505 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6506 WREG32(DC_HPD4_INT_CONTROL, tmp);
6507 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6508 WREG32(DC_HPD5_INT_CONTROL, tmp);
6509 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6510 WREG32(DC_HPD6_INT_CONTROL, tmp);
6511
6512 }
6513
6514 /**
6515 * cik_irq_init - init and enable the interrupt ring
6516 *
6517 * @rdev: radeon_device pointer
6518 *
6519 * Allocate a ring buffer for the interrupt controller,
6520 * enable the RLC, disable interrupts, enable the IH
6521 * ring buffer and enable it (CIK).
6522 * Called at device load and reume.
6523 * Returns 0 for success, errors for failure.
6524 */
6525 static int cik_irq_init(struct radeon_device *rdev)
6526 {
6527 int ret = 0;
6528 int rb_bufsz;
6529 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6530
6531 /* allocate ring */
6532 ret = r600_ih_ring_alloc(rdev);
6533 if (ret)
6534 return ret;
6535
6536 /* disable irqs */
6537 cik_disable_interrupts(rdev);
6538
6539 /* init rlc */
6540 ret = cik_rlc_resume(rdev);
6541 if (ret) {
6542 r600_ih_ring_fini(rdev);
6543 return ret;
6544 }
6545
6546 /* setup interrupt control */
6547 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6548 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6549 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6550 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6551 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6552 */
6553 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6554 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6555 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6556 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6557
6558 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6559 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
6560
6561 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6562 IH_WPTR_OVERFLOW_CLEAR |
6563 (rb_bufsz << 1));
6564
6565 if (rdev->wb.enabled)
6566 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6567
6568 /* set the writeback address whether it's enabled or not */
6569 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6570 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6571
6572 WREG32(IH_RB_CNTL, ih_rb_cntl);
6573
6574 /* set rptr, wptr to 0 */
6575 WREG32(IH_RB_RPTR, 0);
6576 WREG32(IH_RB_WPTR, 0);
6577
6578 /* Default settings for IH_CNTL (disabled at first) */
6579 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6580 /* RPTR_REARM only works if msi's are enabled */
6581 if (rdev->msi_enabled)
6582 ih_cntl |= RPTR_REARM;
6583 WREG32(IH_CNTL, ih_cntl);
6584
6585 /* force the active interrupt state to all disabled */
6586 cik_disable_interrupt_state(rdev);
6587
6588 pci_set_master(rdev->pdev);
6589
6590 /* enable irqs */
6591 cik_enable_interrupts(rdev);
6592
6593 return ret;
6594 }
6595
6596 /**
6597 * cik_irq_set - enable/disable interrupt sources
6598 *
6599 * @rdev: radeon_device pointer
6600 *
6601 * Enable interrupt sources on the GPU (vblanks, hpd,
6602 * etc.) (CIK).
6603 * Returns 0 for success, errors for failure.
6604 */
6605 int cik_irq_set(struct radeon_device *rdev)
6606 {
6607 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6608 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6609 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6610 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6611 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6612 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6613 u32 grbm_int_cntl = 0;
6614 u32 dma_cntl, dma_cntl1;
6615 u32 thermal_int;
6616
6617 if (!rdev->irq.installed) {
6618 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6619 return -EINVAL;
6620 }
6621 /* don't enable anything if the ih is disabled */
6622 if (!rdev->ih.enabled) {
6623 cik_disable_interrupts(rdev);
6624 /* force the active interrupt state to all disabled */
6625 cik_disable_interrupt_state(rdev);
6626 return 0;
6627 }
6628
6629 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6630 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6631 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6632 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6633 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6634 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6635
6636 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6637 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6638
6639 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6640 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6641 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6642 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6643 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6644 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6645 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6646 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6647
6648 if (rdev->flags & RADEON_IS_IGP)
6649 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6650 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6651 else
6652 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6653 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6654
6655 /* enable CP interrupts on all rings */
6656 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6657 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6658 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6659 }
6660 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6661 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6662 DRM_DEBUG("si_irq_set: sw int cp1\n");
6663 if (ring->me == 1) {
6664 switch (ring->pipe) {
6665 case 0:
6666 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6667 break;
6668 case 1:
6669 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6670 break;
6671 case 2:
6672 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6673 break;
6674 case 3:
6675 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6676 break;
6677 default:
6678 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6679 break;
6680 }
6681 } else if (ring->me == 2) {
6682 switch (ring->pipe) {
6683 case 0:
6684 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6685 break;
6686 case 1:
6687 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6688 break;
6689 case 2:
6690 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6691 break;
6692 case 3:
6693 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6694 break;
6695 default:
6696 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6697 break;
6698 }
6699 } else {
6700 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6701 }
6702 }
6703 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6704 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6705 DRM_DEBUG("si_irq_set: sw int cp2\n");
6706 if (ring->me == 1) {
6707 switch (ring->pipe) {
6708 case 0:
6709 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6710 break;
6711 case 1:
6712 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6713 break;
6714 case 2:
6715 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6716 break;
6717 case 3:
6718 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6719 break;
6720 default:
6721 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6722 break;
6723 }
6724 } else if (ring->me == 2) {
6725 switch (ring->pipe) {
6726 case 0:
6727 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6728 break;
6729 case 1:
6730 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6731 break;
6732 case 2:
6733 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6734 break;
6735 case 3:
6736 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6737 break;
6738 default:
6739 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6740 break;
6741 }
6742 } else {
6743 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6744 }
6745 }
6746
6747 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6748 DRM_DEBUG("cik_irq_set: sw int dma\n");
6749 dma_cntl |= TRAP_ENABLE;
6750 }
6751
6752 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6753 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6754 dma_cntl1 |= TRAP_ENABLE;
6755 }
6756
6757 if (rdev->irq.crtc_vblank_int[0] ||
6758 atomic_read(&rdev->irq.pflip[0])) {
6759 DRM_DEBUG("cik_irq_set: vblank 0\n");
6760 crtc1 |= VBLANK_INTERRUPT_MASK;
6761 }
6762 if (rdev->irq.crtc_vblank_int[1] ||
6763 atomic_read(&rdev->irq.pflip[1])) {
6764 DRM_DEBUG("cik_irq_set: vblank 1\n");
6765 crtc2 |= VBLANK_INTERRUPT_MASK;
6766 }
6767 if (rdev->irq.crtc_vblank_int[2] ||
6768 atomic_read(&rdev->irq.pflip[2])) {
6769 DRM_DEBUG("cik_irq_set: vblank 2\n");
6770 crtc3 |= VBLANK_INTERRUPT_MASK;
6771 }
6772 if (rdev->irq.crtc_vblank_int[3] ||
6773 atomic_read(&rdev->irq.pflip[3])) {
6774 DRM_DEBUG("cik_irq_set: vblank 3\n");
6775 crtc4 |= VBLANK_INTERRUPT_MASK;
6776 }
6777 if (rdev->irq.crtc_vblank_int[4] ||
6778 atomic_read(&rdev->irq.pflip[4])) {
6779 DRM_DEBUG("cik_irq_set: vblank 4\n");
6780 crtc5 |= VBLANK_INTERRUPT_MASK;
6781 }
6782 if (rdev->irq.crtc_vblank_int[5] ||
6783 atomic_read(&rdev->irq.pflip[5])) {
6784 DRM_DEBUG("cik_irq_set: vblank 5\n");
6785 crtc6 |= VBLANK_INTERRUPT_MASK;
6786 }
6787 if (rdev->irq.hpd[0]) {
6788 DRM_DEBUG("cik_irq_set: hpd 1\n");
6789 hpd1 |= DC_HPDx_INT_EN;
6790 }
6791 if (rdev->irq.hpd[1]) {
6792 DRM_DEBUG("cik_irq_set: hpd 2\n");
6793 hpd2 |= DC_HPDx_INT_EN;
6794 }
6795 if (rdev->irq.hpd[2]) {
6796 DRM_DEBUG("cik_irq_set: hpd 3\n");
6797 hpd3 |= DC_HPDx_INT_EN;
6798 }
6799 if (rdev->irq.hpd[3]) {
6800 DRM_DEBUG("cik_irq_set: hpd 4\n");
6801 hpd4 |= DC_HPDx_INT_EN;
6802 }
6803 if (rdev->irq.hpd[4]) {
6804 DRM_DEBUG("cik_irq_set: hpd 5\n");
6805 hpd5 |= DC_HPDx_INT_EN;
6806 }
6807 if (rdev->irq.hpd[5]) {
6808 DRM_DEBUG("cik_irq_set: hpd 6\n");
6809 hpd6 |= DC_HPDx_INT_EN;
6810 }
6811
6812 if (rdev->irq.dpm_thermal) {
6813 DRM_DEBUG("dpm thermal\n");
6814 if (rdev->flags & RADEON_IS_IGP)
6815 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6816 else
6817 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6818 }
6819
6820 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6821
6822 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6823 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6824
6825 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6826 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6827 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6828 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6829 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6830 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6831 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6832 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6833
6834 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6835
6836 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6837 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6838 if (rdev->num_crtc >= 4) {
6839 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6840 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6841 }
6842 if (rdev->num_crtc >= 6) {
6843 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6844 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6845 }
6846
6847 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6848 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6849 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6850 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6851 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6852 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6853
6854 if (rdev->flags & RADEON_IS_IGP)
6855 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6856 else
6857 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6858
6859 return 0;
6860 }
6861
6862 /**
6863 * cik_irq_ack - ack interrupt sources
6864 *
6865 * @rdev: radeon_device pointer
6866 *
6867 * Ack interrupt sources on the GPU (vblanks, hpd,
6868 * etc.) (CIK). Certain interrupts sources are sw
6869 * generated and do not require an explicit ack.
6870 */
6871 static inline void cik_irq_ack(struct radeon_device *rdev)
6872 {
6873 u32 tmp;
6874
6875 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6876 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6877 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6878 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6879 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6880 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6881 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6882
6883 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6884 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6885 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6886 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6887 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6888 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6889 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6890 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6891
6892 if (rdev->num_crtc >= 4) {
6893 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6894 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6895 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6896 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6897 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6898 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6899 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6900 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6901 }
6902
6903 if (rdev->num_crtc >= 6) {
6904 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6905 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6906 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6907 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6908 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6909 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6910 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6911 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6912 }
6913
6914 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6915 tmp = RREG32(DC_HPD1_INT_CONTROL);
6916 tmp |= DC_HPDx_INT_ACK;
6917 WREG32(DC_HPD1_INT_CONTROL, tmp);
6918 }
6919 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6920 tmp = RREG32(DC_HPD2_INT_CONTROL);
6921 tmp |= DC_HPDx_INT_ACK;
6922 WREG32(DC_HPD2_INT_CONTROL, tmp);
6923 }
6924 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6925 tmp = RREG32(DC_HPD3_INT_CONTROL);
6926 tmp |= DC_HPDx_INT_ACK;
6927 WREG32(DC_HPD3_INT_CONTROL, tmp);
6928 }
6929 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6930 tmp = RREG32(DC_HPD4_INT_CONTROL);
6931 tmp |= DC_HPDx_INT_ACK;
6932 WREG32(DC_HPD4_INT_CONTROL, tmp);
6933 }
6934 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6935 tmp = RREG32(DC_HPD5_INT_CONTROL);
6936 tmp |= DC_HPDx_INT_ACK;
6937 WREG32(DC_HPD5_INT_CONTROL, tmp);
6938 }
6939 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6940 tmp = RREG32(DC_HPD5_INT_CONTROL);
6941 tmp |= DC_HPDx_INT_ACK;
6942 WREG32(DC_HPD6_INT_CONTROL, tmp);
6943 }
6944 }
6945
6946 /**
6947 * cik_irq_disable - disable interrupts
6948 *
6949 * @rdev: radeon_device pointer
6950 *
6951 * Disable interrupts on the hw (CIK).
6952 */
6953 static void cik_irq_disable(struct radeon_device *rdev)
6954 {
6955 cik_disable_interrupts(rdev);
6956 /* Wait and acknowledge irq */
6957 mdelay(1);
6958 cik_irq_ack(rdev);
6959 cik_disable_interrupt_state(rdev);
6960 }
6961
6962 /**
6963 * cik_irq_disable - disable interrupts for suspend
6964 *
6965 * @rdev: radeon_device pointer
6966 *
6967 * Disable interrupts and stop the RLC (CIK).
6968 * Used for suspend.
6969 */
6970 static void cik_irq_suspend(struct radeon_device *rdev)
6971 {
6972 cik_irq_disable(rdev);
6973 cik_rlc_stop(rdev);
6974 }
6975
6976 /**
6977 * cik_irq_fini - tear down interrupt support
6978 *
6979 * @rdev: radeon_device pointer
6980 *
6981 * Disable interrupts on the hw and free the IH ring
6982 * buffer (CIK).
6983 * Used for driver unload.
6984 */
6985 static void cik_irq_fini(struct radeon_device *rdev)
6986 {
6987 cik_irq_suspend(rdev);
6988 r600_ih_ring_fini(rdev);
6989 }
6990
6991 /**
6992 * cik_get_ih_wptr - get the IH ring buffer wptr
6993 *
6994 * @rdev: radeon_device pointer
6995 *
6996 * Get the IH ring buffer wptr from either the register
6997 * or the writeback memory buffer (CIK). Also check for
6998 * ring buffer overflow and deal with it.
6999 * Used by cik_irq_process().
7000 * Returns the value of the wptr.
7001 */
7002 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7003 {
7004 u32 wptr, tmp;
7005
7006 if (rdev->wb.enabled)
7007 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7008 else
7009 wptr = RREG32(IH_RB_WPTR);
7010
7011 if (wptr & RB_OVERFLOW) {
7012 /* When a ring buffer overflow happen start parsing interrupt
7013 * from the last not overwritten vector (wptr + 16). Hopefully
7014 * this should allow us to catchup.
7015 */
7016 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7017 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7018 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7019 tmp = RREG32(IH_RB_CNTL);
7020 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7021 WREG32(IH_RB_CNTL, tmp);
7022 }
7023 return (wptr & rdev->ih.ptr_mask);
7024 }
7025
7026 /* CIK IV Ring
7027 * Each IV ring entry is 128 bits:
7028 * [7:0] - interrupt source id
7029 * [31:8] - reserved
7030 * [59:32] - interrupt source data
7031 * [63:60] - reserved
7032 * [71:64] - RINGID
7033 * CP:
7034 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7035 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7036 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7037 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7038 * PIPE_ID - ME0 0=3D
7039 * - ME1&2 compute dispatcher (4 pipes each)
7040 * SDMA:
7041 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7042 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7043 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7044 * [79:72] - VMID
7045 * [95:80] - PASID
7046 * [127:96] - reserved
7047 */
7048 /**
7049 * cik_irq_process - interrupt handler
7050 *
7051 * @rdev: radeon_device pointer
7052 *
7053 * Interrupt hander (CIK). Walk the IH ring,
7054 * ack interrupts and schedule work to handle
7055 * interrupt events.
7056 * Returns irq process return code.
7057 */
7058 int cik_irq_process(struct radeon_device *rdev)
7059 {
7060 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7061 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7062 u32 wptr;
7063 u32 rptr;
7064 u32 src_id, src_data, ring_id;
7065 u8 me_id, pipe_id, queue_id;
7066 u32 ring_index;
7067 bool queue_hotplug = false;
7068 bool queue_reset = false;
7069 u32 addr, status, mc_client;
7070 bool queue_thermal = false;
7071
7072 if (!rdev->ih.enabled || rdev->shutdown)
7073 return IRQ_NONE;
7074
7075 wptr = cik_get_ih_wptr(rdev);
7076
7077 restart_ih:
7078 /* is somebody else already processing irqs? */
7079 if (atomic_xchg(&rdev->ih.lock, 1))
7080 return IRQ_NONE;
7081
7082 rptr = rdev->ih.rptr;
7083 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7084
7085 /* Order reading of wptr vs. reading of IH ring data */
7086 rmb();
7087
7088 /* display interrupts */
7089 cik_irq_ack(rdev);
7090
7091 while (rptr != wptr) {
7092 /* wptr/rptr are in bytes! */
7093 ring_index = rptr / 4;
7094 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7095 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7096 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7097
7098 switch (src_id) {
7099 case 1: /* D1 vblank/vline */
7100 switch (src_data) {
7101 case 0: /* D1 vblank */
7102 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7103 if (rdev->irq.crtc_vblank_int[0]) {
7104 drm_handle_vblank(rdev->ddev, 0);
7105 rdev->pm.vblank_sync = true;
7106 wake_up(&rdev->irq.vblank_queue);
7107 }
7108 if (atomic_read(&rdev->irq.pflip[0]))
7109 radeon_crtc_handle_flip(rdev, 0);
7110 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7111 DRM_DEBUG("IH: D1 vblank\n");
7112 }
7113 break;
7114 case 1: /* D1 vline */
7115 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7116 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7117 DRM_DEBUG("IH: D1 vline\n");
7118 }
7119 break;
7120 default:
7121 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7122 break;
7123 }
7124 break;
7125 case 2: /* D2 vblank/vline */
7126 switch (src_data) {
7127 case 0: /* D2 vblank */
7128 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7129 if (rdev->irq.crtc_vblank_int[1]) {
7130 drm_handle_vblank(rdev->ddev, 1);
7131 rdev->pm.vblank_sync = true;
7132 wake_up(&rdev->irq.vblank_queue);
7133 }
7134 if (atomic_read(&rdev->irq.pflip[1]))
7135 radeon_crtc_handle_flip(rdev, 1);
7136 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7137 DRM_DEBUG("IH: D2 vblank\n");
7138 }
7139 break;
7140 case 1: /* D2 vline */
7141 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7142 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7143 DRM_DEBUG("IH: D2 vline\n");
7144 }
7145 break;
7146 default:
7147 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7148 break;
7149 }
7150 break;
7151 case 3: /* D3 vblank/vline */
7152 switch (src_data) {
7153 case 0: /* D3 vblank */
7154 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7155 if (rdev->irq.crtc_vblank_int[2]) {
7156 drm_handle_vblank(rdev->ddev, 2);
7157 rdev->pm.vblank_sync = true;
7158 wake_up(&rdev->irq.vblank_queue);
7159 }
7160 if (atomic_read(&rdev->irq.pflip[2]))
7161 radeon_crtc_handle_flip(rdev, 2);
7162 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7163 DRM_DEBUG("IH: D3 vblank\n");
7164 }
7165 break;
7166 case 1: /* D3 vline */
7167 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7168 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7169 DRM_DEBUG("IH: D3 vline\n");
7170 }
7171 break;
7172 default:
7173 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7174 break;
7175 }
7176 break;
7177 case 4: /* D4 vblank/vline */
7178 switch (src_data) {
7179 case 0: /* D4 vblank */
7180 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7181 if (rdev->irq.crtc_vblank_int[3]) {
7182 drm_handle_vblank(rdev->ddev, 3);
7183 rdev->pm.vblank_sync = true;
7184 wake_up(&rdev->irq.vblank_queue);
7185 }
7186 if (atomic_read(&rdev->irq.pflip[3]))
7187 radeon_crtc_handle_flip(rdev, 3);
7188 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7189 DRM_DEBUG("IH: D4 vblank\n");
7190 }
7191 break;
7192 case 1: /* D4 vline */
7193 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7194 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7195 DRM_DEBUG("IH: D4 vline\n");
7196 }
7197 break;
7198 default:
7199 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7200 break;
7201 }
7202 break;
7203 case 5: /* D5 vblank/vline */
7204 switch (src_data) {
7205 case 0: /* D5 vblank */
7206 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7207 if (rdev->irq.crtc_vblank_int[4]) {
7208 drm_handle_vblank(rdev->ddev, 4);
7209 rdev->pm.vblank_sync = true;
7210 wake_up(&rdev->irq.vblank_queue);
7211 }
7212 if (atomic_read(&rdev->irq.pflip[4]))
7213 radeon_crtc_handle_flip(rdev, 4);
7214 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7215 DRM_DEBUG("IH: D5 vblank\n");
7216 }
7217 break;
7218 case 1: /* D5 vline */
7219 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7220 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7221 DRM_DEBUG("IH: D5 vline\n");
7222 }
7223 break;
7224 default:
7225 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7226 break;
7227 }
7228 break;
7229 case 6: /* D6 vblank/vline */
7230 switch (src_data) {
7231 case 0: /* D6 vblank */
7232 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7233 if (rdev->irq.crtc_vblank_int[5]) {
7234 drm_handle_vblank(rdev->ddev, 5);
7235 rdev->pm.vblank_sync = true;
7236 wake_up(&rdev->irq.vblank_queue);
7237 }
7238 if (atomic_read(&rdev->irq.pflip[5]))
7239 radeon_crtc_handle_flip(rdev, 5);
7240 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7241 DRM_DEBUG("IH: D6 vblank\n");
7242 }
7243 break;
7244 case 1: /* D6 vline */
7245 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7246 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7247 DRM_DEBUG("IH: D6 vline\n");
7248 }
7249 break;
7250 default:
7251 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7252 break;
7253 }
7254 break;
7255 case 42: /* HPD hotplug */
7256 switch (src_data) {
7257 case 0:
7258 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7259 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7260 queue_hotplug = true;
7261 DRM_DEBUG("IH: HPD1\n");
7262 }
7263 break;
7264 case 1:
7265 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7266 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7267 queue_hotplug = true;
7268 DRM_DEBUG("IH: HPD2\n");
7269 }
7270 break;
7271 case 2:
7272 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7273 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7274 queue_hotplug = true;
7275 DRM_DEBUG("IH: HPD3\n");
7276 }
7277 break;
7278 case 3:
7279 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7280 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7281 queue_hotplug = true;
7282 DRM_DEBUG("IH: HPD4\n");
7283 }
7284 break;
7285 case 4:
7286 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7287 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7288 queue_hotplug = true;
7289 DRM_DEBUG("IH: HPD5\n");
7290 }
7291 break;
7292 case 5:
7293 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7294 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7295 queue_hotplug = true;
7296 DRM_DEBUG("IH: HPD6\n");
7297 }
7298 break;
7299 default:
7300 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7301 break;
7302 }
7303 break;
7304 case 146:
7305 case 147:
7306 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7307 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7308 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7309 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7310 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7311 addr);
7312 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7313 status);
7314 cik_vm_decode_fault(rdev, status, addr, mc_client);
7315 /* reset addr and status */
7316 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7317 break;
7318 case 176: /* GFX RB CP_INT */
7319 case 177: /* GFX IB CP_INT */
7320 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7321 break;
7322 case 181: /* CP EOP event */
7323 DRM_DEBUG("IH: CP EOP\n");
7324 /* XXX check the bitfield order! */
7325 me_id = (ring_id & 0x60) >> 5;
7326 pipe_id = (ring_id & 0x18) >> 3;
7327 queue_id = (ring_id & 0x7) >> 0;
7328 switch (me_id) {
7329 case 0:
7330 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7331 break;
7332 case 1:
7333 case 2:
7334 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7335 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7336 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7337 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7338 break;
7339 }
7340 break;
7341 case 184: /* CP Privileged reg access */
7342 DRM_ERROR("Illegal register access in command stream\n");
7343 /* XXX check the bitfield order! */
7344 me_id = (ring_id & 0x60) >> 5;
7345 pipe_id = (ring_id & 0x18) >> 3;
7346 queue_id = (ring_id & 0x7) >> 0;
7347 switch (me_id) {
7348 case 0:
7349 /* This results in a full GPU reset, but all we need to do is soft
7350 * reset the CP for gfx
7351 */
7352 queue_reset = true;
7353 break;
7354 case 1:
7355 /* XXX compute */
7356 queue_reset = true;
7357 break;
7358 case 2:
7359 /* XXX compute */
7360 queue_reset = true;
7361 break;
7362 }
7363 break;
7364 case 185: /* CP Privileged inst */
7365 DRM_ERROR("Illegal instruction in command stream\n");
7366 /* XXX check the bitfield order! */
7367 me_id = (ring_id & 0x60) >> 5;
7368 pipe_id = (ring_id & 0x18) >> 3;
7369 queue_id = (ring_id & 0x7) >> 0;
7370 switch (me_id) {
7371 case 0:
7372 /* This results in a full GPU reset, but all we need to do is soft
7373 * reset the CP for gfx
7374 */
7375 queue_reset = true;
7376 break;
7377 case 1:
7378 /* XXX compute */
7379 queue_reset = true;
7380 break;
7381 case 2:
7382 /* XXX compute */
7383 queue_reset = true;
7384 break;
7385 }
7386 break;
7387 case 224: /* SDMA trap event */
7388 /* XXX check the bitfield order! */
7389 me_id = (ring_id & 0x3) >> 0;
7390 queue_id = (ring_id & 0xc) >> 2;
7391 DRM_DEBUG("IH: SDMA trap\n");
7392 switch (me_id) {
7393 case 0:
7394 switch (queue_id) {
7395 case 0:
7396 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7397 break;
7398 case 1:
7399 /* XXX compute */
7400 break;
7401 case 2:
7402 /* XXX compute */
7403 break;
7404 }
7405 break;
7406 case 1:
7407 switch (queue_id) {
7408 case 0:
7409 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7410 break;
7411 case 1:
7412 /* XXX compute */
7413 break;
7414 case 2:
7415 /* XXX compute */
7416 break;
7417 }
7418 break;
7419 }
7420 break;
7421 case 230: /* thermal low to high */
7422 DRM_DEBUG("IH: thermal low to high\n");
7423 rdev->pm.dpm.thermal.high_to_low = false;
7424 queue_thermal = true;
7425 break;
7426 case 231: /* thermal high to low */
7427 DRM_DEBUG("IH: thermal high to low\n");
7428 rdev->pm.dpm.thermal.high_to_low = true;
7429 queue_thermal = true;
7430 break;
7431 case 233: /* GUI IDLE */
7432 DRM_DEBUG("IH: GUI idle\n");
7433 break;
7434 case 241: /* SDMA Privileged inst */
7435 case 247: /* SDMA Privileged inst */
7436 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7437 /* XXX check the bitfield order! */
7438 me_id = (ring_id & 0x3) >> 0;
7439 queue_id = (ring_id & 0xc) >> 2;
7440 switch (me_id) {
7441 case 0:
7442 switch (queue_id) {
7443 case 0:
7444 queue_reset = true;
7445 break;
7446 case 1:
7447 /* XXX compute */
7448 queue_reset = true;
7449 break;
7450 case 2:
7451 /* XXX compute */
7452 queue_reset = true;
7453 break;
7454 }
7455 break;
7456 case 1:
7457 switch (queue_id) {
7458 case 0:
7459 queue_reset = true;
7460 break;
7461 case 1:
7462 /* XXX compute */
7463 queue_reset = true;
7464 break;
7465 case 2:
7466 /* XXX compute */
7467 queue_reset = true;
7468 break;
7469 }
7470 break;
7471 }
7472 break;
7473 default:
7474 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7475 break;
7476 }
7477
7478 /* wptr/rptr are in bytes! */
7479 rptr += 16;
7480 rptr &= rdev->ih.ptr_mask;
7481 }
7482 if (queue_hotplug)
7483 schedule_work(&rdev->hotplug_work);
7484 if (queue_reset)
7485 schedule_work(&rdev->reset_work);
7486 if (queue_thermal)
7487 schedule_work(&rdev->pm.dpm.thermal.work);
7488 rdev->ih.rptr = rptr;
7489 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7490 atomic_set(&rdev->ih.lock, 0);
7491
7492 /* make sure wptr hasn't changed while processing */
7493 wptr = cik_get_ih_wptr(rdev);
7494 if (wptr != rptr)
7495 goto restart_ih;
7496
7497 return IRQ_HANDLED;
7498 }
7499
7500 /*
7501 * startup/shutdown callbacks
7502 */
7503 /**
7504 * cik_startup - program the asic to a functional state
7505 *
7506 * @rdev: radeon_device pointer
7507 *
7508 * Programs the asic to a functional state (CIK).
7509 * Called by cik_init() and cik_resume().
7510 * Returns 0 for success, error for failure.
7511 */
7512 static int cik_startup(struct radeon_device *rdev)
7513 {
7514 struct radeon_ring *ring;
7515 int r;
7516
7517 /* enable pcie gen2/3 link */
7518 cik_pcie_gen3_enable(rdev);
7519 /* enable aspm */
7520 cik_program_aspm(rdev);
7521
7522 cik_mc_program(rdev);
7523
7524 if (rdev->flags & RADEON_IS_IGP) {
7525 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7526 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7527 r = cik_init_microcode(rdev);
7528 if (r) {
7529 DRM_ERROR("Failed to load firmware!\n");
7530 return r;
7531 }
7532 }
7533 } else {
7534 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7535 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7536 !rdev->mc_fw) {
7537 r = cik_init_microcode(rdev);
7538 if (r) {
7539 DRM_ERROR("Failed to load firmware!\n");
7540 return r;
7541 }
7542 }
7543
7544 r = ci_mc_load_microcode(rdev);
7545 if (r) {
7546 DRM_ERROR("Failed to load MC firmware!\n");
7547 return r;
7548 }
7549 }
7550
7551 r = r600_vram_scratch_init(rdev);
7552 if (r)
7553 return r;
7554
7555 r = cik_pcie_gart_enable(rdev);
7556 if (r)
7557 return r;
7558 cik_gpu_init(rdev);
7559
7560 /* allocate rlc buffers */
7561 if (rdev->flags & RADEON_IS_IGP) {
7562 if (rdev->family == CHIP_KAVERI) {
7563 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7564 rdev->rlc.reg_list_size =
7565 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7566 } else {
7567 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7568 rdev->rlc.reg_list_size =
7569 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7570 }
7571 }
7572 rdev->rlc.cs_data = ci_cs_data;
7573 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7574 r = sumo_rlc_init(rdev);
7575 if (r) {
7576 DRM_ERROR("Failed to init rlc BOs!\n");
7577 return r;
7578 }
7579
7580 /* allocate wb buffer */
7581 r = radeon_wb_init(rdev);
7582 if (r)
7583 return r;
7584
7585 /* allocate mec buffers */
7586 r = cik_mec_init(rdev);
7587 if (r) {
7588 DRM_ERROR("Failed to init MEC BOs!\n");
7589 return r;
7590 }
7591
7592 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7593 if (r) {
7594 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7595 return r;
7596 }
7597
7598 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7599 if (r) {
7600 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7601 return r;
7602 }
7603
7604 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7605 if (r) {
7606 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7607 return r;
7608 }
7609
7610 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7611 if (r) {
7612 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7613 return r;
7614 }
7615
7616 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7617 if (r) {
7618 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7619 return r;
7620 }
7621
7622 r = cik_uvd_resume(rdev);
7623 if (!r) {
7624 r = radeon_fence_driver_start_ring(rdev,
7625 R600_RING_TYPE_UVD_INDEX);
7626 if (r)
7627 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7628 }
7629 if (r)
7630 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7631
7632 /* Enable IRQ */
7633 if (!rdev->irq.installed) {
7634 r = radeon_irq_kms_init(rdev);
7635 if (r)
7636 return r;
7637 }
7638
7639 r = cik_irq_init(rdev);
7640 if (r) {
7641 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7642 radeon_irq_kms_fini(rdev);
7643 return r;
7644 }
7645 cik_irq_set(rdev);
7646
7647 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7648 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7649 CP_RB0_RPTR, CP_RB0_WPTR,
7650 0, 0xfffff, RADEON_CP_PACKET2);
7651 if (r)
7652 return r;
7653
7654 /* set up the compute queues */
7655 /* type-2 packets are deprecated on MEC, use type-3 instead */
7656 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7657 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7658 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7659 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
7660 if (r)
7661 return r;
7662 ring->me = 1; /* first MEC */
7663 ring->pipe = 0; /* first pipe */
7664 ring->queue = 0; /* first queue */
7665 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7666
7667 /* type-2 packets are deprecated on MEC, use type-3 instead */
7668 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7669 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7670 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7671 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
7672 if (r)
7673 return r;
7674 /* dGPU only have 1 MEC */
7675 ring->me = 1; /* first MEC */
7676 ring->pipe = 0; /* first pipe */
7677 ring->queue = 1; /* second queue */
7678 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7679
7680 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7681 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7682 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7683 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7684 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7685 if (r)
7686 return r;
7687
7688 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7689 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7690 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7691 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7692 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7693 if (r)
7694 return r;
7695
7696 r = cik_cp_resume(rdev);
7697 if (r)
7698 return r;
7699
7700 r = cik_sdma_resume(rdev);
7701 if (r)
7702 return r;
7703
7704 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7705 if (ring->ring_size) {
7706 r = radeon_ring_init(rdev, ring, ring->ring_size,
7707 R600_WB_UVD_RPTR_OFFSET,
7708 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7709 0, 0xfffff, RADEON_CP_PACKET2);
7710 if (!r)
7711 r = r600_uvd_init(rdev);
7712 if (r)
7713 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7714 }
7715
7716 r = radeon_ib_pool_init(rdev);
7717 if (r) {
7718 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7719 return r;
7720 }
7721
7722 r = radeon_vm_manager_init(rdev);
7723 if (r) {
7724 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7725 return r;
7726 }
7727
7728 return 0;
7729 }
7730
7731 /**
7732 * cik_resume - resume the asic to a functional state
7733 *
7734 * @rdev: radeon_device pointer
7735 *
7736 * Programs the asic to a functional state (CIK).
7737 * Called at resume.
7738 * Returns 0 for success, error for failure.
7739 */
7740 int cik_resume(struct radeon_device *rdev)
7741 {
7742 int r;
7743
7744 /* post card */
7745 atom_asic_init(rdev->mode_info.atom_context);
7746
7747 /* init golden registers */
7748 cik_init_golden_registers(rdev);
7749
7750 rdev->accel_working = true;
7751 r = cik_startup(rdev);
7752 if (r) {
7753 DRM_ERROR("cik startup failed on resume\n");
7754 rdev->accel_working = false;
7755 return r;
7756 }
7757
7758 return r;
7759
7760 }
7761
7762 /**
7763 * cik_suspend - suspend the asic
7764 *
7765 * @rdev: radeon_device pointer
7766 *
7767 * Bring the chip into a state suitable for suspend (CIK).
7768 * Called at suspend.
7769 * Returns 0 for success.
7770 */
7771 int cik_suspend(struct radeon_device *rdev)
7772 {
7773 radeon_vm_manager_fini(rdev);
7774 cik_cp_enable(rdev, false);
7775 cik_sdma_enable(rdev, false);
7776 r600_uvd_stop(rdev);
7777 radeon_uvd_suspend(rdev);
7778 cik_irq_suspend(rdev);
7779 radeon_wb_disable(rdev);
7780 cik_pcie_gart_disable(rdev);
7781 return 0;
7782 }
7783
7784 /* Plan is to move initialization in that function and use
7785 * helper function so that radeon_device_init pretty much
7786 * do nothing more than calling asic specific function. This
7787 * should also allow to remove a bunch of callback function
7788 * like vram_info.
7789 */
7790 /**
7791 * cik_init - asic specific driver and hw init
7792 *
7793 * @rdev: radeon_device pointer
7794 *
7795 * Setup asic specific driver variables and program the hw
7796 * to a functional state (CIK).
7797 * Called at driver startup.
7798 * Returns 0 for success, errors for failure.
7799 */
7800 int cik_init(struct radeon_device *rdev)
7801 {
7802 struct radeon_ring *ring;
7803 int r;
7804
7805 /* Read BIOS */
7806 if (!radeon_get_bios(rdev)) {
7807 if (ASIC_IS_AVIVO(rdev))
7808 return -EINVAL;
7809 }
7810 /* Must be an ATOMBIOS */
7811 if (!rdev->is_atom_bios) {
7812 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7813 return -EINVAL;
7814 }
7815 r = radeon_atombios_init(rdev);
7816 if (r)
7817 return r;
7818
7819 /* Post card if necessary */
7820 if (!radeon_card_posted(rdev)) {
7821 if (!rdev->bios) {
7822 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7823 return -EINVAL;
7824 }
7825 DRM_INFO("GPU not posted. posting now...\n");
7826 atom_asic_init(rdev->mode_info.atom_context);
7827 }
7828 /* init golden registers */
7829 cik_init_golden_registers(rdev);
7830 /* Initialize scratch registers */
7831 cik_scratch_init(rdev);
7832 /* Initialize surface registers */
7833 radeon_surface_init(rdev);
7834 /* Initialize clocks */
7835 radeon_get_clock_info(rdev->ddev);
7836
7837 /* Fence driver */
7838 r = radeon_fence_driver_init(rdev);
7839 if (r)
7840 return r;
7841
7842 /* initialize memory controller */
7843 r = cik_mc_init(rdev);
7844 if (r)
7845 return r;
7846 /* Memory manager */
7847 r = radeon_bo_init(rdev);
7848 if (r)
7849 return r;
7850
7851 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7852 ring->ring_obj = NULL;
7853 r600_ring_init(rdev, ring, 1024 * 1024);
7854
7855 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7856 ring->ring_obj = NULL;
7857 r600_ring_init(rdev, ring, 1024 * 1024);
7858 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7859 if (r)
7860 return r;
7861
7862 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7863 ring->ring_obj = NULL;
7864 r600_ring_init(rdev, ring, 1024 * 1024);
7865 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7866 if (r)
7867 return r;
7868
7869 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7870 ring->ring_obj = NULL;
7871 r600_ring_init(rdev, ring, 256 * 1024);
7872
7873 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7874 ring->ring_obj = NULL;
7875 r600_ring_init(rdev, ring, 256 * 1024);
7876
7877 r = radeon_uvd_init(rdev);
7878 if (!r) {
7879 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7880 ring->ring_obj = NULL;
7881 r600_ring_init(rdev, ring, 4096);
7882 }
7883
7884 rdev->ih.ring_obj = NULL;
7885 r600_ih_ring_init(rdev, 64 * 1024);
7886
7887 r = r600_pcie_gart_init(rdev);
7888 if (r)
7889 return r;
7890
7891 rdev->accel_working = true;
7892 r = cik_startup(rdev);
7893 if (r) {
7894 dev_err(rdev->dev, "disabling GPU acceleration\n");
7895 cik_cp_fini(rdev);
7896 cik_sdma_fini(rdev);
7897 cik_irq_fini(rdev);
7898 sumo_rlc_fini(rdev);
7899 cik_mec_fini(rdev);
7900 radeon_wb_fini(rdev);
7901 radeon_ib_pool_fini(rdev);
7902 radeon_vm_manager_fini(rdev);
7903 radeon_irq_kms_fini(rdev);
7904 cik_pcie_gart_fini(rdev);
7905 rdev->accel_working = false;
7906 }
7907
7908 /* Don't start up if the MC ucode is missing.
7909 * The default clocks and voltages before the MC ucode
7910 * is loaded are not suffient for advanced operations.
7911 */
7912 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7913 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7914 return -EINVAL;
7915 }
7916
7917 return 0;
7918 }
7919
7920 /**
7921 * cik_fini - asic specific driver and hw fini
7922 *
7923 * @rdev: radeon_device pointer
7924 *
7925 * Tear down the asic specific driver variables and program the hw
7926 * to an idle state (CIK).
7927 * Called at driver unload.
7928 */
7929 void cik_fini(struct radeon_device *rdev)
7930 {
7931 cik_cp_fini(rdev);
7932 cik_sdma_fini(rdev);
7933 cik_irq_fini(rdev);
7934 sumo_rlc_fini(rdev);
7935 cik_mec_fini(rdev);
7936 radeon_wb_fini(rdev);
7937 radeon_vm_manager_fini(rdev);
7938 radeon_ib_pool_fini(rdev);
7939 radeon_irq_kms_fini(rdev);
7940 r600_uvd_stop(rdev);
7941 radeon_uvd_fini(rdev);
7942 cik_pcie_gart_fini(rdev);
7943 r600_vram_scratch_fini(rdev);
7944 radeon_gem_fini(rdev);
7945 radeon_fence_driver_fini(rdev);
7946 radeon_bo_fini(rdev);
7947 radeon_atombios_fini(rdev);
7948 kfree(rdev->bios);
7949 rdev->bios = NULL;
7950 }
7951
7952 /* display watermark setup */
7953 /**
7954 * dce8_line_buffer_adjust - Set up the line buffer
7955 *
7956 * @rdev: radeon_device pointer
7957 * @radeon_crtc: the selected display controller
7958 * @mode: the current display mode on the selected display
7959 * controller
7960 *
7961 * Setup up the line buffer allocation for
7962 * the selected display controller (CIK).
7963 * Returns the line buffer size in pixels.
7964 */
7965 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7966 struct radeon_crtc *radeon_crtc,
7967 struct drm_display_mode *mode)
7968 {
7969 u32 tmp;
7970
7971 /*
7972 * Line Buffer Setup
7973 * There are 6 line buffers, one for each display controllers.
7974 * There are 3 partitions per LB. Select the number of partitions
7975 * to enable based on the display width. For display widths larger
7976 * than 4096, you need use to use 2 display controllers and combine
7977 * them using the stereo blender.
7978 */
7979 if (radeon_crtc->base.enabled && mode) {
7980 if (mode->crtc_hdisplay < 1920)
7981 tmp = 1;
7982 else if (mode->crtc_hdisplay < 2560)
7983 tmp = 2;
7984 else if (mode->crtc_hdisplay < 4096)
7985 tmp = 0;
7986 else {
7987 DRM_DEBUG_KMS("Mode too big for LB!\n");
7988 tmp = 0;
7989 }
7990 } else
7991 tmp = 1;
7992
7993 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7994 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7995
7996 if (radeon_crtc->base.enabled && mode) {
7997 switch (tmp) {
7998 case 0:
7999 default:
8000 return 4096 * 2;
8001 case 1:
8002 return 1920 * 2;
8003 case 2:
8004 return 2560 * 2;
8005 }
8006 }
8007
8008 /* controller not enabled, so no lb used */
8009 return 0;
8010 }
8011
8012 /**
8013 * cik_get_number_of_dram_channels - get the number of dram channels
8014 *
8015 * @rdev: radeon_device pointer
8016 *
8017 * Look up the number of video ram channels (CIK).
8018 * Used for display watermark bandwidth calculations
8019 * Returns the number of dram channels
8020 */
8021 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8022 {
8023 u32 tmp = RREG32(MC_SHARED_CHMAP);
8024
8025 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8026 case 0:
8027 default:
8028 return 1;
8029 case 1:
8030 return 2;
8031 case 2:
8032 return 4;
8033 case 3:
8034 return 8;
8035 case 4:
8036 return 3;
8037 case 5:
8038 return 6;
8039 case 6:
8040 return 10;
8041 case 7:
8042 return 12;
8043 case 8:
8044 return 16;
8045 }
8046 }
8047
8048 struct dce8_wm_params {
8049 u32 dram_channels; /* number of dram channels */
8050 u32 yclk; /* bandwidth per dram data pin in kHz */
8051 u32 sclk; /* engine clock in kHz */
8052 u32 disp_clk; /* display clock in kHz */
8053 u32 src_width; /* viewport width */
8054 u32 active_time; /* active display time in ns */
8055 u32 blank_time; /* blank time in ns */
8056 bool interlaced; /* mode is interlaced */
8057 fixed20_12 vsc; /* vertical scale ratio */
8058 u32 num_heads; /* number of active crtcs */
8059 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8060 u32 lb_size; /* line buffer allocated to pipe */
8061 u32 vtaps; /* vertical scaler taps */
8062 };
8063
8064 /**
8065 * dce8_dram_bandwidth - get the dram bandwidth
8066 *
8067 * @wm: watermark calculation data
8068 *
8069 * Calculate the raw dram bandwidth (CIK).
8070 * Used for display watermark bandwidth calculations
8071 * Returns the dram bandwidth in MBytes/s
8072 */
8073 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8074 {
8075 /* Calculate raw DRAM Bandwidth */
8076 fixed20_12 dram_efficiency; /* 0.7 */
8077 fixed20_12 yclk, dram_channels, bandwidth;
8078 fixed20_12 a;
8079
8080 a.full = dfixed_const(1000);
8081 yclk.full = dfixed_const(wm->yclk);
8082 yclk.full = dfixed_div(yclk, a);
8083 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8084 a.full = dfixed_const(10);
8085 dram_efficiency.full = dfixed_const(7);
8086 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8087 bandwidth.full = dfixed_mul(dram_channels, yclk);
8088 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8089
8090 return dfixed_trunc(bandwidth);
8091 }
8092
8093 /**
8094 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8095 *
8096 * @wm: watermark calculation data
8097 *
8098 * Calculate the dram bandwidth used for display (CIK).
8099 * Used for display watermark bandwidth calculations
8100 * Returns the dram bandwidth for display in MBytes/s
8101 */
8102 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8103 {
8104 /* Calculate DRAM Bandwidth and the part allocated to display. */
8105 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8106 fixed20_12 yclk, dram_channels, bandwidth;
8107 fixed20_12 a;
8108
8109 a.full = dfixed_const(1000);
8110 yclk.full = dfixed_const(wm->yclk);
8111 yclk.full = dfixed_div(yclk, a);
8112 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8113 a.full = dfixed_const(10);
8114 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8115 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8116 bandwidth.full = dfixed_mul(dram_channels, yclk);
8117 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8118
8119 return dfixed_trunc(bandwidth);
8120 }
8121
8122 /**
8123 * dce8_data_return_bandwidth - get the data return bandwidth
8124 *
8125 * @wm: watermark calculation data
8126 *
8127 * Calculate the data return bandwidth used for display (CIK).
8128 * Used for display watermark bandwidth calculations
8129 * Returns the data return bandwidth in MBytes/s
8130 */
8131 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8132 {
8133 /* Calculate the display Data return Bandwidth */
8134 fixed20_12 return_efficiency; /* 0.8 */
8135 fixed20_12 sclk, bandwidth;
8136 fixed20_12 a;
8137
8138 a.full = dfixed_const(1000);
8139 sclk.full = dfixed_const(wm->sclk);
8140 sclk.full = dfixed_div(sclk, a);
8141 a.full = dfixed_const(10);
8142 return_efficiency.full = dfixed_const(8);
8143 return_efficiency.full = dfixed_div(return_efficiency, a);
8144 a.full = dfixed_const(32);
8145 bandwidth.full = dfixed_mul(a, sclk);
8146 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8147
8148 return dfixed_trunc(bandwidth);
8149 }
8150
8151 /**
8152 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8153 *
8154 * @wm: watermark calculation data
8155 *
8156 * Calculate the dmif bandwidth used for display (CIK).
8157 * Used for display watermark bandwidth calculations
8158 * Returns the dmif bandwidth in MBytes/s
8159 */
8160 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8161 {
8162 /* Calculate the DMIF Request Bandwidth */
8163 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8164 fixed20_12 disp_clk, bandwidth;
8165 fixed20_12 a, b;
8166
8167 a.full = dfixed_const(1000);
8168 disp_clk.full = dfixed_const(wm->disp_clk);
8169 disp_clk.full = dfixed_div(disp_clk, a);
8170 a.full = dfixed_const(32);
8171 b.full = dfixed_mul(a, disp_clk);
8172
8173 a.full = dfixed_const(10);
8174 disp_clk_request_efficiency.full = dfixed_const(8);
8175 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8176
8177 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8178
8179 return dfixed_trunc(bandwidth);
8180 }
8181
8182 /**
8183 * dce8_available_bandwidth - get the min available bandwidth
8184 *
8185 * @wm: watermark calculation data
8186 *
8187 * Calculate the min available bandwidth used for display (CIK).
8188 * Used for display watermark bandwidth calculations
8189 * Returns the min available bandwidth in MBytes/s
8190 */
8191 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8192 {
8193 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8194 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8195 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8196 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8197
8198 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8199 }
8200
8201 /**
8202 * dce8_average_bandwidth - get the average available bandwidth
8203 *
8204 * @wm: watermark calculation data
8205 *
8206 * Calculate the average available bandwidth used for display (CIK).
8207 * Used for display watermark bandwidth calculations
8208 * Returns the average available bandwidth in MBytes/s
8209 */
8210 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8211 {
8212 /* Calculate the display mode Average Bandwidth
8213 * DisplayMode should contain the source and destination dimensions,
8214 * timing, etc.
8215 */
8216 fixed20_12 bpp;
8217 fixed20_12 line_time;
8218 fixed20_12 src_width;
8219 fixed20_12 bandwidth;
8220 fixed20_12 a;
8221
8222 a.full = dfixed_const(1000);
8223 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8224 line_time.full = dfixed_div(line_time, a);
8225 bpp.full = dfixed_const(wm->bytes_per_pixel);
8226 src_width.full = dfixed_const(wm->src_width);
8227 bandwidth.full = dfixed_mul(src_width, bpp);
8228 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8229 bandwidth.full = dfixed_div(bandwidth, line_time);
8230
8231 return dfixed_trunc(bandwidth);
8232 }
8233
8234 /**
8235 * dce8_latency_watermark - get the latency watermark
8236 *
8237 * @wm: watermark calculation data
8238 *
8239 * Calculate the latency watermark (CIK).
8240 * Used for display watermark bandwidth calculations
8241 * Returns the latency watermark in ns
8242 */
8243 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8244 {
8245 /* First calculate the latency in ns */
8246 u32 mc_latency = 2000; /* 2000 ns. */
8247 u32 available_bandwidth = dce8_available_bandwidth(wm);
8248 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8249 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8250 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8251 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8252 (wm->num_heads * cursor_line_pair_return_time);
8253 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8254 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8255 u32 tmp, dmif_size = 12288;
8256 fixed20_12 a, b, c;
8257
8258 if (wm->num_heads == 0)
8259 return 0;
8260
8261 a.full = dfixed_const(2);
8262 b.full = dfixed_const(1);
8263 if ((wm->vsc.full > a.full) ||
8264 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8265 (wm->vtaps >= 5) ||
8266 ((wm->vsc.full >= a.full) && wm->interlaced))
8267 max_src_lines_per_dst_line = 4;
8268 else
8269 max_src_lines_per_dst_line = 2;
8270
8271 a.full = dfixed_const(available_bandwidth);
8272 b.full = dfixed_const(wm->num_heads);
8273 a.full = dfixed_div(a, b);
8274
8275 b.full = dfixed_const(mc_latency + 512);
8276 c.full = dfixed_const(wm->disp_clk);
8277 b.full = dfixed_div(b, c);
8278
8279 c.full = dfixed_const(dmif_size);
8280 b.full = dfixed_div(c, b);
8281
8282 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8283
8284 b.full = dfixed_const(1000);
8285 c.full = dfixed_const(wm->disp_clk);
8286 b.full = dfixed_div(c, b);
8287 c.full = dfixed_const(wm->bytes_per_pixel);
8288 b.full = dfixed_mul(b, c);
8289
8290 lb_fill_bw = min(tmp, dfixed_trunc(b));
8291
8292 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8293 b.full = dfixed_const(1000);
8294 c.full = dfixed_const(lb_fill_bw);
8295 b.full = dfixed_div(c, b);
8296 a.full = dfixed_div(a, b);
8297 line_fill_time = dfixed_trunc(a);
8298
8299 if (line_fill_time < wm->active_time)
8300 return latency;
8301 else
8302 return latency + (line_fill_time - wm->active_time);
8303
8304 }
8305
8306 /**
8307 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8308 * average and available dram bandwidth
8309 *
8310 * @wm: watermark calculation data
8311 *
8312 * Check if the display average bandwidth fits in the display
8313 * dram bandwidth (CIK).
8314 * Used for display watermark bandwidth calculations
8315 * Returns true if the display fits, false if not.
8316 */
8317 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8318 {
8319 if (dce8_average_bandwidth(wm) <=
8320 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8321 return true;
8322 else
8323 return false;
8324 }
8325
8326 /**
8327 * dce8_average_bandwidth_vs_available_bandwidth - check
8328 * average and available bandwidth
8329 *
8330 * @wm: watermark calculation data
8331 *
8332 * Check if the display average bandwidth fits in the display
8333 * available bandwidth (CIK).
8334 * Used for display watermark bandwidth calculations
8335 * Returns true if the display fits, false if not.
8336 */
8337 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8338 {
8339 if (dce8_average_bandwidth(wm) <=
8340 (dce8_available_bandwidth(wm) / wm->num_heads))
8341 return true;
8342 else
8343 return false;
8344 }
8345
8346 /**
8347 * dce8_check_latency_hiding - check latency hiding
8348 *
8349 * @wm: watermark calculation data
8350 *
8351 * Check latency hiding (CIK).
8352 * Used for display watermark bandwidth calculations
8353 * Returns true if the display fits, false if not.
8354 */
8355 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8356 {
8357 u32 lb_partitions = wm->lb_size / wm->src_width;
8358 u32 line_time = wm->active_time + wm->blank_time;
8359 u32 latency_tolerant_lines;
8360 u32 latency_hiding;
8361 fixed20_12 a;
8362
8363 a.full = dfixed_const(1);
8364 if (wm->vsc.full > a.full)
8365 latency_tolerant_lines = 1;
8366 else {
8367 if (lb_partitions <= (wm->vtaps + 1))
8368 latency_tolerant_lines = 1;
8369 else
8370 latency_tolerant_lines = 2;
8371 }
8372
8373 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8374
8375 if (dce8_latency_watermark(wm) <= latency_hiding)
8376 return true;
8377 else
8378 return false;
8379 }
8380
8381 /**
8382 * dce8_program_watermarks - program display watermarks
8383 *
8384 * @rdev: radeon_device pointer
8385 * @radeon_crtc: the selected display controller
8386 * @lb_size: line buffer size
8387 * @num_heads: number of display controllers in use
8388 *
8389 * Calculate and program the display watermarks for the
8390 * selected display controller (CIK).
8391 */
8392 static void dce8_program_watermarks(struct radeon_device *rdev,
8393 struct radeon_crtc *radeon_crtc,
8394 u32 lb_size, u32 num_heads)
8395 {
8396 struct drm_display_mode *mode = &radeon_crtc->base.mode;
8397 struct dce8_wm_params wm_low, wm_high;
8398 u32 pixel_period;
8399 u32 line_time = 0;
8400 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8401 u32 tmp, wm_mask;
8402
8403 if (radeon_crtc->base.enabled && num_heads && mode) {
8404 pixel_period = 1000000 / (u32)mode->clock;
8405 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8406
8407 /* watermark for high clocks */
8408 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8409 rdev->pm.dpm_enabled) {
8410 wm_high.yclk =
8411 radeon_dpm_get_mclk(rdev, false) * 10;
8412 wm_high.sclk =
8413 radeon_dpm_get_sclk(rdev, false) * 10;
8414 } else {
8415 wm_high.yclk = rdev->pm.current_mclk * 10;
8416 wm_high.sclk = rdev->pm.current_sclk * 10;
8417 }
8418
8419 wm_high.disp_clk = mode->clock;
8420 wm_high.src_width = mode->crtc_hdisplay;
8421 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8422 wm_high.blank_time = line_time - wm_high.active_time;
8423 wm_high.interlaced = false;
8424 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8425 wm_high.interlaced = true;
8426 wm_high.vsc = radeon_crtc->vsc;
8427 wm_high.vtaps = 1;
8428 if (radeon_crtc->rmx_type != RMX_OFF)
8429 wm_high.vtaps = 2;
8430 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8431 wm_high.lb_size = lb_size;
8432 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8433 wm_high.num_heads = num_heads;
8434
8435 /* set for high clocks */
8436 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8437
8438 /* possibly force display priority to high */
8439 /* should really do this at mode validation time... */
8440 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8441 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8442 !dce8_check_latency_hiding(&wm_high) ||
8443 (rdev->disp_priority == 2)) {
8444 DRM_DEBUG_KMS("force priority to high\n");
8445 }
8446
8447 /* watermark for low clocks */
8448 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8449 rdev->pm.dpm_enabled) {
8450 wm_low.yclk =
8451 radeon_dpm_get_mclk(rdev, true) * 10;
8452 wm_low.sclk =
8453 radeon_dpm_get_sclk(rdev, true) * 10;
8454 } else {
8455 wm_low.yclk = rdev->pm.current_mclk * 10;
8456 wm_low.sclk = rdev->pm.current_sclk * 10;
8457 }
8458
8459 wm_low.disp_clk = mode->clock;
8460 wm_low.src_width = mode->crtc_hdisplay;
8461 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8462 wm_low.blank_time = line_time - wm_low.active_time;
8463 wm_low.interlaced = false;
8464 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8465 wm_low.interlaced = true;
8466 wm_low.vsc = radeon_crtc->vsc;
8467 wm_low.vtaps = 1;
8468 if (radeon_crtc->rmx_type != RMX_OFF)
8469 wm_low.vtaps = 2;
8470 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8471 wm_low.lb_size = lb_size;
8472 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8473 wm_low.num_heads = num_heads;
8474
8475 /* set for low clocks */
8476 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8477
8478 /* possibly force display priority to high */
8479 /* should really do this at mode validation time... */
8480 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8481 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8482 !dce8_check_latency_hiding(&wm_low) ||
8483 (rdev->disp_priority == 2)) {
8484 DRM_DEBUG_KMS("force priority to high\n");
8485 }
8486 }
8487
8488 /* select wm A */
8489 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8490 tmp = wm_mask;
8491 tmp &= ~LATENCY_WATERMARK_MASK(3);
8492 tmp |= LATENCY_WATERMARK_MASK(1);
8493 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8494 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8495 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8496 LATENCY_HIGH_WATERMARK(line_time)));
8497 /* select wm B */
8498 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8499 tmp &= ~LATENCY_WATERMARK_MASK(3);
8500 tmp |= LATENCY_WATERMARK_MASK(2);
8501 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8502 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8503 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8504 LATENCY_HIGH_WATERMARK(line_time)));
8505 /* restore original selection */
8506 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8507
8508 /* save values for DPM */
8509 radeon_crtc->line_time = line_time;
8510 radeon_crtc->wm_high = latency_watermark_a;
8511 radeon_crtc->wm_low = latency_watermark_b;
8512 }
8513
8514 /**
8515 * dce8_bandwidth_update - program display watermarks
8516 *
8517 * @rdev: radeon_device pointer
8518 *
8519 * Calculate and program the display watermarks and line
8520 * buffer allocation (CIK).
8521 */
8522 void dce8_bandwidth_update(struct radeon_device *rdev)
8523 {
8524 struct drm_display_mode *mode = NULL;
8525 u32 num_heads = 0, lb_size;
8526 int i;
8527
8528 radeon_update_display_priority(rdev);
8529
8530 for (i = 0; i < rdev->num_crtc; i++) {
8531 if (rdev->mode_info.crtcs[i]->base.enabled)
8532 num_heads++;
8533 }
8534 for (i = 0; i < rdev->num_crtc; i++) {
8535 mode = &rdev->mode_info.crtcs[i]->base.mode;
8536 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8537 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8538 }
8539 }
8540
8541 /**
8542 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8543 *
8544 * @rdev: radeon_device pointer
8545 *
8546 * Fetches a GPU clock counter snapshot (SI).
8547 * Returns the 64 bit clock counter snapshot.
8548 */
8549 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8550 {
8551 uint64_t clock;
8552
8553 mutex_lock(&rdev->gpu_clock_mutex);
8554 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8555 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8556 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8557 mutex_unlock(&rdev->gpu_clock_mutex);
8558 return clock;
8559 }
8560
8561 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8562 u32 cntl_reg, u32 status_reg)
8563 {
8564 int r, i;
8565 struct atom_clock_dividers dividers;
8566 uint32_t tmp;
8567
8568 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8569 clock, false, &dividers);
8570 if (r)
8571 return r;
8572
8573 tmp = RREG32_SMC(cntl_reg);
8574 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8575 tmp |= dividers.post_divider;
8576 WREG32_SMC(cntl_reg, tmp);
8577
8578 for (i = 0; i < 100; i++) {
8579 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8580 break;
8581 mdelay(10);
8582 }
8583 if (i == 100)
8584 return -ETIMEDOUT;
8585
8586 return 0;
8587 }
8588
8589 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8590 {
8591 int r = 0;
8592
8593 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8594 if (r)
8595 return r;
8596
8597 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8598 return r;
8599 }
8600
8601 int cik_uvd_resume(struct radeon_device *rdev)
8602 {
8603 uint64_t addr;
8604 uint32_t size;
8605 int r;
8606
8607 r = radeon_uvd_resume(rdev);
8608 if (r)
8609 return r;
8610
8611 /* programm the VCPU memory controller bits 0-27 */
8612 addr = rdev->uvd.gpu_addr >> 3;
8613 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
8614 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
8615 WREG32(UVD_VCPU_CACHE_SIZE0, size);
8616
8617 addr += size;
8618 size = RADEON_UVD_STACK_SIZE >> 3;
8619 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
8620 WREG32(UVD_VCPU_CACHE_SIZE1, size);
8621
8622 addr += size;
8623 size = RADEON_UVD_HEAP_SIZE >> 3;
8624 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
8625 WREG32(UVD_VCPU_CACHE_SIZE2, size);
8626
8627 /* bits 28-31 */
8628 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
8629 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
8630
8631 /* bits 32-39 */
8632 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
8633 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
8634
8635 return 0;
8636 }
8637
8638 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8639 {
8640 struct pci_dev *root = rdev->pdev->bus->self;
8641 int bridge_pos, gpu_pos;
8642 u32 speed_cntl, mask, current_data_rate;
8643 int ret, i;
8644 u16 tmp16;
8645
8646 if (radeon_pcie_gen2 == 0)
8647 return;
8648
8649 if (rdev->flags & RADEON_IS_IGP)
8650 return;
8651
8652 if (!(rdev->flags & RADEON_IS_PCIE))
8653 return;
8654
8655 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8656 if (ret != 0)
8657 return;
8658
8659 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8660 return;
8661
8662 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8663 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8664 LC_CURRENT_DATA_RATE_SHIFT;
8665 if (mask & DRM_PCIE_SPEED_80) {
8666 if (current_data_rate == 2) {
8667 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8668 return;
8669 }
8670 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8671 } else if (mask & DRM_PCIE_SPEED_50) {
8672 if (current_data_rate == 1) {
8673 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8674 return;
8675 }
8676 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8677 }
8678
8679 bridge_pos = pci_pcie_cap(root);
8680 if (!bridge_pos)
8681 return;
8682
8683 gpu_pos = pci_pcie_cap(rdev->pdev);
8684 if (!gpu_pos)
8685 return;
8686
8687 if (mask & DRM_PCIE_SPEED_80) {
8688 /* re-try equalization if gen3 is not already enabled */
8689 if (current_data_rate != 2) {
8690 u16 bridge_cfg, gpu_cfg;
8691 u16 bridge_cfg2, gpu_cfg2;
8692 u32 max_lw, current_lw, tmp;
8693
8694 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8695 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8696
8697 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8698 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8699
8700 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8701 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8702
8703 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8704 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8705 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8706
8707 if (current_lw < max_lw) {
8708 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8709 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8710 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8711 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8712 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8713 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8714 }
8715 }
8716
8717 for (i = 0; i < 10; i++) {
8718 /* check status */
8719 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8720 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8721 break;
8722
8723 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8724 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8725
8726 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8727 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8728
8729 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8730 tmp |= LC_SET_QUIESCE;
8731 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8732
8733 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8734 tmp |= LC_REDO_EQ;
8735 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8736
8737 mdelay(100);
8738
8739 /* linkctl */
8740 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8741 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8742 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8743 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8744
8745 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8746 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8747 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8748 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8749
8750 /* linkctl2 */
8751 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8752 tmp16 &= ~((1 << 4) | (7 << 9));
8753 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8754 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8755
8756 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8757 tmp16 &= ~((1 << 4) | (7 << 9));
8758 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8759 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8760
8761 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8762 tmp &= ~LC_SET_QUIESCE;
8763 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8764 }
8765 }
8766 }
8767
8768 /* set the link speed */
8769 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8770 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8771 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8772
8773 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8774 tmp16 &= ~0xf;
8775 if (mask & DRM_PCIE_SPEED_80)
8776 tmp16 |= 3; /* gen3 */
8777 else if (mask & DRM_PCIE_SPEED_50)
8778 tmp16 |= 2; /* gen2 */
8779 else
8780 tmp16 |= 1; /* gen1 */
8781 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8782
8783 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8784 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8785 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8786
8787 for (i = 0; i < rdev->usec_timeout; i++) {
8788 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8789 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8790 break;
8791 udelay(1);
8792 }
8793 }
8794
8795 static void cik_program_aspm(struct radeon_device *rdev)
8796 {
8797 u32 data, orig;
8798 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8799 bool disable_clkreq = false;
8800
8801 if (radeon_aspm == 0)
8802 return;
8803
8804 /* XXX double check IGPs */
8805 if (rdev->flags & RADEON_IS_IGP)
8806 return;
8807
8808 if (!(rdev->flags & RADEON_IS_PCIE))
8809 return;
8810
8811 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8812 data &= ~LC_XMIT_N_FTS_MASK;
8813 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8814 if (orig != data)
8815 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8816
8817 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8818 data |= LC_GO_TO_RECOVERY;
8819 if (orig != data)
8820 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8821
8822 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8823 data |= P_IGNORE_EDB_ERR;
8824 if (orig != data)
8825 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8826
8827 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8828 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8829 data |= LC_PMI_TO_L1_DIS;
8830 if (!disable_l0s)
8831 data |= LC_L0S_INACTIVITY(7);
8832
8833 if (!disable_l1) {
8834 data |= LC_L1_INACTIVITY(7);
8835 data &= ~LC_PMI_TO_L1_DIS;
8836 if (orig != data)
8837 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8838
8839 if (!disable_plloff_in_l1) {
8840 bool clk_req_support;
8841
8842 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8843 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8844 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8845 if (orig != data)
8846 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8847
8848 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8849 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8850 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8851 if (orig != data)
8852 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8853
8854 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8855 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8856 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8857 if (orig != data)
8858 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8859
8860 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8861 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8862 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8863 if (orig != data)
8864 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8865
8866 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8867 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8868 data |= LC_DYN_LANES_PWR_STATE(3);
8869 if (orig != data)
8870 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8871
8872 if (!disable_clkreq) {
8873 struct pci_dev *root = rdev->pdev->bus->self;
8874 u32 lnkcap;
8875
8876 clk_req_support = false;
8877 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8878 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8879 clk_req_support = true;
8880 } else {
8881 clk_req_support = false;
8882 }
8883
8884 if (clk_req_support) {
8885 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8886 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8887 if (orig != data)
8888 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8889
8890 orig = data = RREG32_SMC(THM_CLK_CNTL);
8891 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8892 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8893 if (orig != data)
8894 WREG32_SMC(THM_CLK_CNTL, data);
8895
8896 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8897 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8898 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8899 if (orig != data)
8900 WREG32_SMC(MISC_CLK_CTRL, data);
8901
8902 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8903 data &= ~BCLK_AS_XCLK;
8904 if (orig != data)
8905 WREG32_SMC(CG_CLKPIN_CNTL, data);
8906
8907 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8908 data &= ~FORCE_BIF_REFCLK_EN;
8909 if (orig != data)
8910 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8911
8912 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8913 data &= ~MPLL_CLKOUT_SEL_MASK;
8914 data |= MPLL_CLKOUT_SEL(4);
8915 if (orig != data)
8916 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8917 }
8918 }
8919 } else {
8920 if (orig != data)
8921 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8922 }
8923
8924 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8925 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8926 if (orig != data)
8927 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8928
8929 if (!disable_l0s) {
8930 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8931 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8932 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8933 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8934 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8935 data &= ~LC_L0S_INACTIVITY_MASK;
8936 if (orig != data)
8937 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8938 }
8939 }
8940 }
8941 }
This page took 0.305192 seconds and 5 git commands to generate.