drm/radeon: separate UVD code v3
[deliverable/linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 static void cik_rlc_stop(struct radeon_device *rdev);
68 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
69 static void cik_program_aspm(struct radeon_device *rdev);
70 static void cik_init_pg(struct radeon_device *rdev);
71 static void cik_init_cg(struct radeon_device *rdev);
72
73 /* get temperature in millidegrees */
74 int ci_get_temp(struct radeon_device *rdev)
75 {
76 u32 temp;
77 int actual_temp = 0;
78
79 temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
80 CTF_TEMP_SHIFT;
81
82 if (temp & 0x200)
83 actual_temp = 255;
84 else
85 actual_temp = temp & 0x1ff;
86
87 actual_temp = actual_temp * 1000;
88
89 return actual_temp;
90 }
91
92 /* get temperature in millidegrees */
93 int kv_get_temp(struct radeon_device *rdev)
94 {
95 u32 temp;
96 int actual_temp = 0;
97
98 temp = RREG32_SMC(0xC0300E0C);
99
100 if (temp)
101 actual_temp = (temp / 8) - 49;
102 else
103 actual_temp = 0;
104
105 actual_temp = actual_temp * 1000;
106
107 return actual_temp;
108 }
109
110 /*
111 * Indirect registers accessor
112 */
113 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
114 {
115 u32 r;
116
117 WREG32(PCIE_INDEX, reg);
118 (void)RREG32(PCIE_INDEX);
119 r = RREG32(PCIE_DATA);
120 return r;
121 }
122
123 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
124 {
125 WREG32(PCIE_INDEX, reg);
126 (void)RREG32(PCIE_INDEX);
127 WREG32(PCIE_DATA, v);
128 (void)RREG32(PCIE_DATA);
129 }
130
131 static const u32 spectre_rlc_save_restore_register_list[] =
132 {
133 (0x0e00 << 16) | (0xc12c >> 2),
134 0x00000000,
135 (0x0e00 << 16) | (0xc140 >> 2),
136 0x00000000,
137 (0x0e00 << 16) | (0xc150 >> 2),
138 0x00000000,
139 (0x0e00 << 16) | (0xc15c >> 2),
140 0x00000000,
141 (0x0e00 << 16) | (0xc168 >> 2),
142 0x00000000,
143 (0x0e00 << 16) | (0xc170 >> 2),
144 0x00000000,
145 (0x0e00 << 16) | (0xc178 >> 2),
146 0x00000000,
147 (0x0e00 << 16) | (0xc204 >> 2),
148 0x00000000,
149 (0x0e00 << 16) | (0xc2b4 >> 2),
150 0x00000000,
151 (0x0e00 << 16) | (0xc2b8 >> 2),
152 0x00000000,
153 (0x0e00 << 16) | (0xc2bc >> 2),
154 0x00000000,
155 (0x0e00 << 16) | (0xc2c0 >> 2),
156 0x00000000,
157 (0x0e00 << 16) | (0x8228 >> 2),
158 0x00000000,
159 (0x0e00 << 16) | (0x829c >> 2),
160 0x00000000,
161 (0x0e00 << 16) | (0x869c >> 2),
162 0x00000000,
163 (0x0600 << 16) | (0x98f4 >> 2),
164 0x00000000,
165 (0x0e00 << 16) | (0x98f8 >> 2),
166 0x00000000,
167 (0x0e00 << 16) | (0x9900 >> 2),
168 0x00000000,
169 (0x0e00 << 16) | (0xc260 >> 2),
170 0x00000000,
171 (0x0e00 << 16) | (0x90e8 >> 2),
172 0x00000000,
173 (0x0e00 << 16) | (0x3c000 >> 2),
174 0x00000000,
175 (0x0e00 << 16) | (0x3c00c >> 2),
176 0x00000000,
177 (0x0e00 << 16) | (0x8c1c >> 2),
178 0x00000000,
179 (0x0e00 << 16) | (0x9700 >> 2),
180 0x00000000,
181 (0x0e00 << 16) | (0xcd20 >> 2),
182 0x00000000,
183 (0x4e00 << 16) | (0xcd20 >> 2),
184 0x00000000,
185 (0x5e00 << 16) | (0xcd20 >> 2),
186 0x00000000,
187 (0x6e00 << 16) | (0xcd20 >> 2),
188 0x00000000,
189 (0x7e00 << 16) | (0xcd20 >> 2),
190 0x00000000,
191 (0x8e00 << 16) | (0xcd20 >> 2),
192 0x00000000,
193 (0x9e00 << 16) | (0xcd20 >> 2),
194 0x00000000,
195 (0xae00 << 16) | (0xcd20 >> 2),
196 0x00000000,
197 (0xbe00 << 16) | (0xcd20 >> 2),
198 0x00000000,
199 (0x0e00 << 16) | (0x89bc >> 2),
200 0x00000000,
201 (0x0e00 << 16) | (0x8900 >> 2),
202 0x00000000,
203 0x3,
204 (0x0e00 << 16) | (0xc130 >> 2),
205 0x00000000,
206 (0x0e00 << 16) | (0xc134 >> 2),
207 0x00000000,
208 (0x0e00 << 16) | (0xc1fc >> 2),
209 0x00000000,
210 (0x0e00 << 16) | (0xc208 >> 2),
211 0x00000000,
212 (0x0e00 << 16) | (0xc264 >> 2),
213 0x00000000,
214 (0x0e00 << 16) | (0xc268 >> 2),
215 0x00000000,
216 (0x0e00 << 16) | (0xc26c >> 2),
217 0x00000000,
218 (0x0e00 << 16) | (0xc270 >> 2),
219 0x00000000,
220 (0x0e00 << 16) | (0xc274 >> 2),
221 0x00000000,
222 (0x0e00 << 16) | (0xc278 >> 2),
223 0x00000000,
224 (0x0e00 << 16) | (0xc27c >> 2),
225 0x00000000,
226 (0x0e00 << 16) | (0xc280 >> 2),
227 0x00000000,
228 (0x0e00 << 16) | (0xc284 >> 2),
229 0x00000000,
230 (0x0e00 << 16) | (0xc288 >> 2),
231 0x00000000,
232 (0x0e00 << 16) | (0xc28c >> 2),
233 0x00000000,
234 (0x0e00 << 16) | (0xc290 >> 2),
235 0x00000000,
236 (0x0e00 << 16) | (0xc294 >> 2),
237 0x00000000,
238 (0x0e00 << 16) | (0xc298 >> 2),
239 0x00000000,
240 (0x0e00 << 16) | (0xc29c >> 2),
241 0x00000000,
242 (0x0e00 << 16) | (0xc2a0 >> 2),
243 0x00000000,
244 (0x0e00 << 16) | (0xc2a4 >> 2),
245 0x00000000,
246 (0x0e00 << 16) | (0xc2a8 >> 2),
247 0x00000000,
248 (0x0e00 << 16) | (0xc2ac >> 2),
249 0x00000000,
250 (0x0e00 << 16) | (0xc2b0 >> 2),
251 0x00000000,
252 (0x0e00 << 16) | (0x301d0 >> 2),
253 0x00000000,
254 (0x0e00 << 16) | (0x30238 >> 2),
255 0x00000000,
256 (0x0e00 << 16) | (0x30250 >> 2),
257 0x00000000,
258 (0x0e00 << 16) | (0x30254 >> 2),
259 0x00000000,
260 (0x0e00 << 16) | (0x30258 >> 2),
261 0x00000000,
262 (0x0e00 << 16) | (0x3025c >> 2),
263 0x00000000,
264 (0x4e00 << 16) | (0xc900 >> 2),
265 0x00000000,
266 (0x5e00 << 16) | (0xc900 >> 2),
267 0x00000000,
268 (0x6e00 << 16) | (0xc900 >> 2),
269 0x00000000,
270 (0x7e00 << 16) | (0xc900 >> 2),
271 0x00000000,
272 (0x8e00 << 16) | (0xc900 >> 2),
273 0x00000000,
274 (0x9e00 << 16) | (0xc900 >> 2),
275 0x00000000,
276 (0xae00 << 16) | (0xc900 >> 2),
277 0x00000000,
278 (0xbe00 << 16) | (0xc900 >> 2),
279 0x00000000,
280 (0x4e00 << 16) | (0xc904 >> 2),
281 0x00000000,
282 (0x5e00 << 16) | (0xc904 >> 2),
283 0x00000000,
284 (0x6e00 << 16) | (0xc904 >> 2),
285 0x00000000,
286 (0x7e00 << 16) | (0xc904 >> 2),
287 0x00000000,
288 (0x8e00 << 16) | (0xc904 >> 2),
289 0x00000000,
290 (0x9e00 << 16) | (0xc904 >> 2),
291 0x00000000,
292 (0xae00 << 16) | (0xc904 >> 2),
293 0x00000000,
294 (0xbe00 << 16) | (0xc904 >> 2),
295 0x00000000,
296 (0x4e00 << 16) | (0xc908 >> 2),
297 0x00000000,
298 (0x5e00 << 16) | (0xc908 >> 2),
299 0x00000000,
300 (0x6e00 << 16) | (0xc908 >> 2),
301 0x00000000,
302 (0x7e00 << 16) | (0xc908 >> 2),
303 0x00000000,
304 (0x8e00 << 16) | (0xc908 >> 2),
305 0x00000000,
306 (0x9e00 << 16) | (0xc908 >> 2),
307 0x00000000,
308 (0xae00 << 16) | (0xc908 >> 2),
309 0x00000000,
310 (0xbe00 << 16) | (0xc908 >> 2),
311 0x00000000,
312 (0x4e00 << 16) | (0xc90c >> 2),
313 0x00000000,
314 (0x5e00 << 16) | (0xc90c >> 2),
315 0x00000000,
316 (0x6e00 << 16) | (0xc90c >> 2),
317 0x00000000,
318 (0x7e00 << 16) | (0xc90c >> 2),
319 0x00000000,
320 (0x8e00 << 16) | (0xc90c >> 2),
321 0x00000000,
322 (0x9e00 << 16) | (0xc90c >> 2),
323 0x00000000,
324 (0xae00 << 16) | (0xc90c >> 2),
325 0x00000000,
326 (0xbe00 << 16) | (0xc90c >> 2),
327 0x00000000,
328 (0x4e00 << 16) | (0xc910 >> 2),
329 0x00000000,
330 (0x5e00 << 16) | (0xc910 >> 2),
331 0x00000000,
332 (0x6e00 << 16) | (0xc910 >> 2),
333 0x00000000,
334 (0x7e00 << 16) | (0xc910 >> 2),
335 0x00000000,
336 (0x8e00 << 16) | (0xc910 >> 2),
337 0x00000000,
338 (0x9e00 << 16) | (0xc910 >> 2),
339 0x00000000,
340 (0xae00 << 16) | (0xc910 >> 2),
341 0x00000000,
342 (0xbe00 << 16) | (0xc910 >> 2),
343 0x00000000,
344 (0x0e00 << 16) | (0xc99c >> 2),
345 0x00000000,
346 (0x0e00 << 16) | (0x9834 >> 2),
347 0x00000000,
348 (0x0000 << 16) | (0x30f00 >> 2),
349 0x00000000,
350 (0x0001 << 16) | (0x30f00 >> 2),
351 0x00000000,
352 (0x0000 << 16) | (0x30f04 >> 2),
353 0x00000000,
354 (0x0001 << 16) | (0x30f04 >> 2),
355 0x00000000,
356 (0x0000 << 16) | (0x30f08 >> 2),
357 0x00000000,
358 (0x0001 << 16) | (0x30f08 >> 2),
359 0x00000000,
360 (0x0000 << 16) | (0x30f0c >> 2),
361 0x00000000,
362 (0x0001 << 16) | (0x30f0c >> 2),
363 0x00000000,
364 (0x0600 << 16) | (0x9b7c >> 2),
365 0x00000000,
366 (0x0e00 << 16) | (0x8a14 >> 2),
367 0x00000000,
368 (0x0e00 << 16) | (0x8a18 >> 2),
369 0x00000000,
370 (0x0600 << 16) | (0x30a00 >> 2),
371 0x00000000,
372 (0x0e00 << 16) | (0x8bf0 >> 2),
373 0x00000000,
374 (0x0e00 << 16) | (0x8bcc >> 2),
375 0x00000000,
376 (0x0e00 << 16) | (0x8b24 >> 2),
377 0x00000000,
378 (0x0e00 << 16) | (0x30a04 >> 2),
379 0x00000000,
380 (0x0600 << 16) | (0x30a10 >> 2),
381 0x00000000,
382 (0x0600 << 16) | (0x30a14 >> 2),
383 0x00000000,
384 (0x0600 << 16) | (0x30a18 >> 2),
385 0x00000000,
386 (0x0600 << 16) | (0x30a2c >> 2),
387 0x00000000,
388 (0x0e00 << 16) | (0xc700 >> 2),
389 0x00000000,
390 (0x0e00 << 16) | (0xc704 >> 2),
391 0x00000000,
392 (0x0e00 << 16) | (0xc708 >> 2),
393 0x00000000,
394 (0x0e00 << 16) | (0xc768 >> 2),
395 0x00000000,
396 (0x0400 << 16) | (0xc770 >> 2),
397 0x00000000,
398 (0x0400 << 16) | (0xc774 >> 2),
399 0x00000000,
400 (0x0400 << 16) | (0xc778 >> 2),
401 0x00000000,
402 (0x0400 << 16) | (0xc77c >> 2),
403 0x00000000,
404 (0x0400 << 16) | (0xc780 >> 2),
405 0x00000000,
406 (0x0400 << 16) | (0xc784 >> 2),
407 0x00000000,
408 (0x0400 << 16) | (0xc788 >> 2),
409 0x00000000,
410 (0x0400 << 16) | (0xc78c >> 2),
411 0x00000000,
412 (0x0400 << 16) | (0xc798 >> 2),
413 0x00000000,
414 (0x0400 << 16) | (0xc79c >> 2),
415 0x00000000,
416 (0x0400 << 16) | (0xc7a0 >> 2),
417 0x00000000,
418 (0x0400 << 16) | (0xc7a4 >> 2),
419 0x00000000,
420 (0x0400 << 16) | (0xc7a8 >> 2),
421 0x00000000,
422 (0x0400 << 16) | (0xc7ac >> 2),
423 0x00000000,
424 (0x0400 << 16) | (0xc7b0 >> 2),
425 0x00000000,
426 (0x0400 << 16) | (0xc7b4 >> 2),
427 0x00000000,
428 (0x0e00 << 16) | (0x9100 >> 2),
429 0x00000000,
430 (0x0e00 << 16) | (0x3c010 >> 2),
431 0x00000000,
432 (0x0e00 << 16) | (0x92a8 >> 2),
433 0x00000000,
434 (0x0e00 << 16) | (0x92ac >> 2),
435 0x00000000,
436 (0x0e00 << 16) | (0x92b4 >> 2),
437 0x00000000,
438 (0x0e00 << 16) | (0x92b8 >> 2),
439 0x00000000,
440 (0x0e00 << 16) | (0x92bc >> 2),
441 0x00000000,
442 (0x0e00 << 16) | (0x92c0 >> 2),
443 0x00000000,
444 (0x0e00 << 16) | (0x92c4 >> 2),
445 0x00000000,
446 (0x0e00 << 16) | (0x92c8 >> 2),
447 0x00000000,
448 (0x0e00 << 16) | (0x92cc >> 2),
449 0x00000000,
450 (0x0e00 << 16) | (0x92d0 >> 2),
451 0x00000000,
452 (0x0e00 << 16) | (0x8c00 >> 2),
453 0x00000000,
454 (0x0e00 << 16) | (0x8c04 >> 2),
455 0x00000000,
456 (0x0e00 << 16) | (0x8c20 >> 2),
457 0x00000000,
458 (0x0e00 << 16) | (0x8c38 >> 2),
459 0x00000000,
460 (0x0e00 << 16) | (0x8c3c >> 2),
461 0x00000000,
462 (0x0e00 << 16) | (0xae00 >> 2),
463 0x00000000,
464 (0x0e00 << 16) | (0x9604 >> 2),
465 0x00000000,
466 (0x0e00 << 16) | (0xac08 >> 2),
467 0x00000000,
468 (0x0e00 << 16) | (0xac0c >> 2),
469 0x00000000,
470 (0x0e00 << 16) | (0xac10 >> 2),
471 0x00000000,
472 (0x0e00 << 16) | (0xac14 >> 2),
473 0x00000000,
474 (0x0e00 << 16) | (0xac58 >> 2),
475 0x00000000,
476 (0x0e00 << 16) | (0xac68 >> 2),
477 0x00000000,
478 (0x0e00 << 16) | (0xac6c >> 2),
479 0x00000000,
480 (0x0e00 << 16) | (0xac70 >> 2),
481 0x00000000,
482 (0x0e00 << 16) | (0xac74 >> 2),
483 0x00000000,
484 (0x0e00 << 16) | (0xac78 >> 2),
485 0x00000000,
486 (0x0e00 << 16) | (0xac7c >> 2),
487 0x00000000,
488 (0x0e00 << 16) | (0xac80 >> 2),
489 0x00000000,
490 (0x0e00 << 16) | (0xac84 >> 2),
491 0x00000000,
492 (0x0e00 << 16) | (0xac88 >> 2),
493 0x00000000,
494 (0x0e00 << 16) | (0xac8c >> 2),
495 0x00000000,
496 (0x0e00 << 16) | (0x970c >> 2),
497 0x00000000,
498 (0x0e00 << 16) | (0x9714 >> 2),
499 0x00000000,
500 (0x0e00 << 16) | (0x9718 >> 2),
501 0x00000000,
502 (0x0e00 << 16) | (0x971c >> 2),
503 0x00000000,
504 (0x0e00 << 16) | (0x31068 >> 2),
505 0x00000000,
506 (0x4e00 << 16) | (0x31068 >> 2),
507 0x00000000,
508 (0x5e00 << 16) | (0x31068 >> 2),
509 0x00000000,
510 (0x6e00 << 16) | (0x31068 >> 2),
511 0x00000000,
512 (0x7e00 << 16) | (0x31068 >> 2),
513 0x00000000,
514 (0x8e00 << 16) | (0x31068 >> 2),
515 0x00000000,
516 (0x9e00 << 16) | (0x31068 >> 2),
517 0x00000000,
518 (0xae00 << 16) | (0x31068 >> 2),
519 0x00000000,
520 (0xbe00 << 16) | (0x31068 >> 2),
521 0x00000000,
522 (0x0e00 << 16) | (0xcd10 >> 2),
523 0x00000000,
524 (0x0e00 << 16) | (0xcd14 >> 2),
525 0x00000000,
526 (0x0e00 << 16) | (0x88b0 >> 2),
527 0x00000000,
528 (0x0e00 << 16) | (0x88b4 >> 2),
529 0x00000000,
530 (0x0e00 << 16) | (0x88b8 >> 2),
531 0x00000000,
532 (0x0e00 << 16) | (0x88bc >> 2),
533 0x00000000,
534 (0x0400 << 16) | (0x89c0 >> 2),
535 0x00000000,
536 (0x0e00 << 16) | (0x88c4 >> 2),
537 0x00000000,
538 (0x0e00 << 16) | (0x88c8 >> 2),
539 0x00000000,
540 (0x0e00 << 16) | (0x88d0 >> 2),
541 0x00000000,
542 (0x0e00 << 16) | (0x88d4 >> 2),
543 0x00000000,
544 (0x0e00 << 16) | (0x88d8 >> 2),
545 0x00000000,
546 (0x0e00 << 16) | (0x8980 >> 2),
547 0x00000000,
548 (0x0e00 << 16) | (0x30938 >> 2),
549 0x00000000,
550 (0x0e00 << 16) | (0x3093c >> 2),
551 0x00000000,
552 (0x0e00 << 16) | (0x30940 >> 2),
553 0x00000000,
554 (0x0e00 << 16) | (0x89a0 >> 2),
555 0x00000000,
556 (0x0e00 << 16) | (0x30900 >> 2),
557 0x00000000,
558 (0x0e00 << 16) | (0x30904 >> 2),
559 0x00000000,
560 (0x0e00 << 16) | (0x89b4 >> 2),
561 0x00000000,
562 (0x0e00 << 16) | (0x3c210 >> 2),
563 0x00000000,
564 (0x0e00 << 16) | (0x3c214 >> 2),
565 0x00000000,
566 (0x0e00 << 16) | (0x3c218 >> 2),
567 0x00000000,
568 (0x0e00 << 16) | (0x8904 >> 2),
569 0x00000000,
570 0x5,
571 (0x0e00 << 16) | (0x8c28 >> 2),
572 (0x0e00 << 16) | (0x8c2c >> 2),
573 (0x0e00 << 16) | (0x8c30 >> 2),
574 (0x0e00 << 16) | (0x8c34 >> 2),
575 (0x0e00 << 16) | (0x9600 >> 2),
576 };
577
578 static const u32 kalindi_rlc_save_restore_register_list[] =
579 {
580 (0x0e00 << 16) | (0xc12c >> 2),
581 0x00000000,
582 (0x0e00 << 16) | (0xc140 >> 2),
583 0x00000000,
584 (0x0e00 << 16) | (0xc150 >> 2),
585 0x00000000,
586 (0x0e00 << 16) | (0xc15c >> 2),
587 0x00000000,
588 (0x0e00 << 16) | (0xc168 >> 2),
589 0x00000000,
590 (0x0e00 << 16) | (0xc170 >> 2),
591 0x00000000,
592 (0x0e00 << 16) | (0xc204 >> 2),
593 0x00000000,
594 (0x0e00 << 16) | (0xc2b4 >> 2),
595 0x00000000,
596 (0x0e00 << 16) | (0xc2b8 >> 2),
597 0x00000000,
598 (0x0e00 << 16) | (0xc2bc >> 2),
599 0x00000000,
600 (0x0e00 << 16) | (0xc2c0 >> 2),
601 0x00000000,
602 (0x0e00 << 16) | (0x8228 >> 2),
603 0x00000000,
604 (0x0e00 << 16) | (0x829c >> 2),
605 0x00000000,
606 (0x0e00 << 16) | (0x869c >> 2),
607 0x00000000,
608 (0x0600 << 16) | (0x98f4 >> 2),
609 0x00000000,
610 (0x0e00 << 16) | (0x98f8 >> 2),
611 0x00000000,
612 (0x0e00 << 16) | (0x9900 >> 2),
613 0x00000000,
614 (0x0e00 << 16) | (0xc260 >> 2),
615 0x00000000,
616 (0x0e00 << 16) | (0x90e8 >> 2),
617 0x00000000,
618 (0x0e00 << 16) | (0x3c000 >> 2),
619 0x00000000,
620 (0x0e00 << 16) | (0x3c00c >> 2),
621 0x00000000,
622 (0x0e00 << 16) | (0x8c1c >> 2),
623 0x00000000,
624 (0x0e00 << 16) | (0x9700 >> 2),
625 0x00000000,
626 (0x0e00 << 16) | (0xcd20 >> 2),
627 0x00000000,
628 (0x4e00 << 16) | (0xcd20 >> 2),
629 0x00000000,
630 (0x5e00 << 16) | (0xcd20 >> 2),
631 0x00000000,
632 (0x6e00 << 16) | (0xcd20 >> 2),
633 0x00000000,
634 (0x7e00 << 16) | (0xcd20 >> 2),
635 0x00000000,
636 (0x0e00 << 16) | (0x89bc >> 2),
637 0x00000000,
638 (0x0e00 << 16) | (0x8900 >> 2),
639 0x00000000,
640 0x3,
641 (0x0e00 << 16) | (0xc130 >> 2),
642 0x00000000,
643 (0x0e00 << 16) | (0xc134 >> 2),
644 0x00000000,
645 (0x0e00 << 16) | (0xc1fc >> 2),
646 0x00000000,
647 (0x0e00 << 16) | (0xc208 >> 2),
648 0x00000000,
649 (0x0e00 << 16) | (0xc264 >> 2),
650 0x00000000,
651 (0x0e00 << 16) | (0xc268 >> 2),
652 0x00000000,
653 (0x0e00 << 16) | (0xc26c >> 2),
654 0x00000000,
655 (0x0e00 << 16) | (0xc270 >> 2),
656 0x00000000,
657 (0x0e00 << 16) | (0xc274 >> 2),
658 0x00000000,
659 (0x0e00 << 16) | (0xc28c >> 2),
660 0x00000000,
661 (0x0e00 << 16) | (0xc290 >> 2),
662 0x00000000,
663 (0x0e00 << 16) | (0xc294 >> 2),
664 0x00000000,
665 (0x0e00 << 16) | (0xc298 >> 2),
666 0x00000000,
667 (0x0e00 << 16) | (0xc2a0 >> 2),
668 0x00000000,
669 (0x0e00 << 16) | (0xc2a4 >> 2),
670 0x00000000,
671 (0x0e00 << 16) | (0xc2a8 >> 2),
672 0x00000000,
673 (0x0e00 << 16) | (0xc2ac >> 2),
674 0x00000000,
675 (0x0e00 << 16) | (0x301d0 >> 2),
676 0x00000000,
677 (0x0e00 << 16) | (0x30238 >> 2),
678 0x00000000,
679 (0x0e00 << 16) | (0x30250 >> 2),
680 0x00000000,
681 (0x0e00 << 16) | (0x30254 >> 2),
682 0x00000000,
683 (0x0e00 << 16) | (0x30258 >> 2),
684 0x00000000,
685 (0x0e00 << 16) | (0x3025c >> 2),
686 0x00000000,
687 (0x4e00 << 16) | (0xc900 >> 2),
688 0x00000000,
689 (0x5e00 << 16) | (0xc900 >> 2),
690 0x00000000,
691 (0x6e00 << 16) | (0xc900 >> 2),
692 0x00000000,
693 (0x7e00 << 16) | (0xc900 >> 2),
694 0x00000000,
695 (0x4e00 << 16) | (0xc904 >> 2),
696 0x00000000,
697 (0x5e00 << 16) | (0xc904 >> 2),
698 0x00000000,
699 (0x6e00 << 16) | (0xc904 >> 2),
700 0x00000000,
701 (0x7e00 << 16) | (0xc904 >> 2),
702 0x00000000,
703 (0x4e00 << 16) | (0xc908 >> 2),
704 0x00000000,
705 (0x5e00 << 16) | (0xc908 >> 2),
706 0x00000000,
707 (0x6e00 << 16) | (0xc908 >> 2),
708 0x00000000,
709 (0x7e00 << 16) | (0xc908 >> 2),
710 0x00000000,
711 (0x4e00 << 16) | (0xc90c >> 2),
712 0x00000000,
713 (0x5e00 << 16) | (0xc90c >> 2),
714 0x00000000,
715 (0x6e00 << 16) | (0xc90c >> 2),
716 0x00000000,
717 (0x7e00 << 16) | (0xc90c >> 2),
718 0x00000000,
719 (0x4e00 << 16) | (0xc910 >> 2),
720 0x00000000,
721 (0x5e00 << 16) | (0xc910 >> 2),
722 0x00000000,
723 (0x6e00 << 16) | (0xc910 >> 2),
724 0x00000000,
725 (0x7e00 << 16) | (0xc910 >> 2),
726 0x00000000,
727 (0x0e00 << 16) | (0xc99c >> 2),
728 0x00000000,
729 (0x0e00 << 16) | (0x9834 >> 2),
730 0x00000000,
731 (0x0000 << 16) | (0x30f00 >> 2),
732 0x00000000,
733 (0x0000 << 16) | (0x30f04 >> 2),
734 0x00000000,
735 (0x0000 << 16) | (0x30f08 >> 2),
736 0x00000000,
737 (0x0000 << 16) | (0x30f0c >> 2),
738 0x00000000,
739 (0x0600 << 16) | (0x9b7c >> 2),
740 0x00000000,
741 (0x0e00 << 16) | (0x8a14 >> 2),
742 0x00000000,
743 (0x0e00 << 16) | (0x8a18 >> 2),
744 0x00000000,
745 (0x0600 << 16) | (0x30a00 >> 2),
746 0x00000000,
747 (0x0e00 << 16) | (0x8bf0 >> 2),
748 0x00000000,
749 (0x0e00 << 16) | (0x8bcc >> 2),
750 0x00000000,
751 (0x0e00 << 16) | (0x8b24 >> 2),
752 0x00000000,
753 (0x0e00 << 16) | (0x30a04 >> 2),
754 0x00000000,
755 (0x0600 << 16) | (0x30a10 >> 2),
756 0x00000000,
757 (0x0600 << 16) | (0x30a14 >> 2),
758 0x00000000,
759 (0x0600 << 16) | (0x30a18 >> 2),
760 0x00000000,
761 (0x0600 << 16) | (0x30a2c >> 2),
762 0x00000000,
763 (0x0e00 << 16) | (0xc700 >> 2),
764 0x00000000,
765 (0x0e00 << 16) | (0xc704 >> 2),
766 0x00000000,
767 (0x0e00 << 16) | (0xc708 >> 2),
768 0x00000000,
769 (0x0e00 << 16) | (0xc768 >> 2),
770 0x00000000,
771 (0x0400 << 16) | (0xc770 >> 2),
772 0x00000000,
773 (0x0400 << 16) | (0xc774 >> 2),
774 0x00000000,
775 (0x0400 << 16) | (0xc798 >> 2),
776 0x00000000,
777 (0x0400 << 16) | (0xc79c >> 2),
778 0x00000000,
779 (0x0e00 << 16) | (0x9100 >> 2),
780 0x00000000,
781 (0x0e00 << 16) | (0x3c010 >> 2),
782 0x00000000,
783 (0x0e00 << 16) | (0x8c00 >> 2),
784 0x00000000,
785 (0x0e00 << 16) | (0x8c04 >> 2),
786 0x00000000,
787 (0x0e00 << 16) | (0x8c20 >> 2),
788 0x00000000,
789 (0x0e00 << 16) | (0x8c38 >> 2),
790 0x00000000,
791 (0x0e00 << 16) | (0x8c3c >> 2),
792 0x00000000,
793 (0x0e00 << 16) | (0xae00 >> 2),
794 0x00000000,
795 (0x0e00 << 16) | (0x9604 >> 2),
796 0x00000000,
797 (0x0e00 << 16) | (0xac08 >> 2),
798 0x00000000,
799 (0x0e00 << 16) | (0xac0c >> 2),
800 0x00000000,
801 (0x0e00 << 16) | (0xac10 >> 2),
802 0x00000000,
803 (0x0e00 << 16) | (0xac14 >> 2),
804 0x00000000,
805 (0x0e00 << 16) | (0xac58 >> 2),
806 0x00000000,
807 (0x0e00 << 16) | (0xac68 >> 2),
808 0x00000000,
809 (0x0e00 << 16) | (0xac6c >> 2),
810 0x00000000,
811 (0x0e00 << 16) | (0xac70 >> 2),
812 0x00000000,
813 (0x0e00 << 16) | (0xac74 >> 2),
814 0x00000000,
815 (0x0e00 << 16) | (0xac78 >> 2),
816 0x00000000,
817 (0x0e00 << 16) | (0xac7c >> 2),
818 0x00000000,
819 (0x0e00 << 16) | (0xac80 >> 2),
820 0x00000000,
821 (0x0e00 << 16) | (0xac84 >> 2),
822 0x00000000,
823 (0x0e00 << 16) | (0xac88 >> 2),
824 0x00000000,
825 (0x0e00 << 16) | (0xac8c >> 2),
826 0x00000000,
827 (0x0e00 << 16) | (0x970c >> 2),
828 0x00000000,
829 (0x0e00 << 16) | (0x9714 >> 2),
830 0x00000000,
831 (0x0e00 << 16) | (0x9718 >> 2),
832 0x00000000,
833 (0x0e00 << 16) | (0x971c >> 2),
834 0x00000000,
835 (0x0e00 << 16) | (0x31068 >> 2),
836 0x00000000,
837 (0x4e00 << 16) | (0x31068 >> 2),
838 0x00000000,
839 (0x5e00 << 16) | (0x31068 >> 2),
840 0x00000000,
841 (0x6e00 << 16) | (0x31068 >> 2),
842 0x00000000,
843 (0x7e00 << 16) | (0x31068 >> 2),
844 0x00000000,
845 (0x0e00 << 16) | (0xcd10 >> 2),
846 0x00000000,
847 (0x0e00 << 16) | (0xcd14 >> 2),
848 0x00000000,
849 (0x0e00 << 16) | (0x88b0 >> 2),
850 0x00000000,
851 (0x0e00 << 16) | (0x88b4 >> 2),
852 0x00000000,
853 (0x0e00 << 16) | (0x88b8 >> 2),
854 0x00000000,
855 (0x0e00 << 16) | (0x88bc >> 2),
856 0x00000000,
857 (0x0400 << 16) | (0x89c0 >> 2),
858 0x00000000,
859 (0x0e00 << 16) | (0x88c4 >> 2),
860 0x00000000,
861 (0x0e00 << 16) | (0x88c8 >> 2),
862 0x00000000,
863 (0x0e00 << 16) | (0x88d0 >> 2),
864 0x00000000,
865 (0x0e00 << 16) | (0x88d4 >> 2),
866 0x00000000,
867 (0x0e00 << 16) | (0x88d8 >> 2),
868 0x00000000,
869 (0x0e00 << 16) | (0x8980 >> 2),
870 0x00000000,
871 (0x0e00 << 16) | (0x30938 >> 2),
872 0x00000000,
873 (0x0e00 << 16) | (0x3093c >> 2),
874 0x00000000,
875 (0x0e00 << 16) | (0x30940 >> 2),
876 0x00000000,
877 (0x0e00 << 16) | (0x89a0 >> 2),
878 0x00000000,
879 (0x0e00 << 16) | (0x30900 >> 2),
880 0x00000000,
881 (0x0e00 << 16) | (0x30904 >> 2),
882 0x00000000,
883 (0x0e00 << 16) | (0x89b4 >> 2),
884 0x00000000,
885 (0x0e00 << 16) | (0x3e1fc >> 2),
886 0x00000000,
887 (0x0e00 << 16) | (0x3c210 >> 2),
888 0x00000000,
889 (0x0e00 << 16) | (0x3c214 >> 2),
890 0x00000000,
891 (0x0e00 << 16) | (0x3c218 >> 2),
892 0x00000000,
893 (0x0e00 << 16) | (0x8904 >> 2),
894 0x00000000,
895 0x5,
896 (0x0e00 << 16) | (0x8c28 >> 2),
897 (0x0e00 << 16) | (0x8c2c >> 2),
898 (0x0e00 << 16) | (0x8c30 >> 2),
899 (0x0e00 << 16) | (0x8c34 >> 2),
900 (0x0e00 << 16) | (0x9600 >> 2),
901 };
902
903 static const u32 bonaire_golden_spm_registers[] =
904 {
905 0x30800, 0xe0ffffff, 0xe0000000
906 };
907
908 static const u32 bonaire_golden_common_registers[] =
909 {
910 0xc770, 0xffffffff, 0x00000800,
911 0xc774, 0xffffffff, 0x00000800,
912 0xc798, 0xffffffff, 0x00007fbf,
913 0xc79c, 0xffffffff, 0x00007faf
914 };
915
916 static const u32 bonaire_golden_registers[] =
917 {
918 0x3354, 0x00000333, 0x00000333,
919 0x3350, 0x000c0fc0, 0x00040200,
920 0x9a10, 0x00010000, 0x00058208,
921 0x3c000, 0xffff1fff, 0x00140000,
922 0x3c200, 0xfdfc0fff, 0x00000100,
923 0x3c234, 0x40000000, 0x40000200,
924 0x9830, 0xffffffff, 0x00000000,
925 0x9834, 0xf00fffff, 0x00000400,
926 0x9838, 0x0002021c, 0x00020200,
927 0xc78, 0x00000080, 0x00000000,
928 0x5bb0, 0x000000f0, 0x00000070,
929 0x5bc0, 0xf0311fff, 0x80300000,
930 0x98f8, 0x73773777, 0x12010001,
931 0x350c, 0x00810000, 0x408af000,
932 0x7030, 0x31000111, 0x00000011,
933 0x2f48, 0x73773777, 0x12010001,
934 0x220c, 0x00007fb6, 0x0021a1b1,
935 0x2210, 0x00007fb6, 0x002021b1,
936 0x2180, 0x00007fb6, 0x00002191,
937 0x2218, 0x00007fb6, 0x002121b1,
938 0x221c, 0x00007fb6, 0x002021b1,
939 0x21dc, 0x00007fb6, 0x00002191,
940 0x21e0, 0x00007fb6, 0x00002191,
941 0x3628, 0x0000003f, 0x0000000a,
942 0x362c, 0x0000003f, 0x0000000a,
943 0x2ae4, 0x00073ffe, 0x000022a2,
944 0x240c, 0x000007ff, 0x00000000,
945 0x8a14, 0xf000003f, 0x00000007,
946 0x8bf0, 0x00002001, 0x00000001,
947 0x8b24, 0xffffffff, 0x00ffffff,
948 0x30a04, 0x0000ff0f, 0x00000000,
949 0x28a4c, 0x07ffffff, 0x06000000,
950 0x4d8, 0x00000fff, 0x00000100,
951 0x3e78, 0x00000001, 0x00000002,
952 0x9100, 0x03000000, 0x0362c688,
953 0x8c00, 0x000000ff, 0x00000001,
954 0xe40, 0x00001fff, 0x00001fff,
955 0x9060, 0x0000007f, 0x00000020,
956 0x9508, 0x00010000, 0x00010000,
957 0xac14, 0x000003ff, 0x000000f3,
958 0xac0c, 0xffffffff, 0x00001032
959 };
960
961 static const u32 bonaire_mgcg_cgcg_init[] =
962 {
963 0xc420, 0xffffffff, 0xfffffffc,
964 0x30800, 0xffffffff, 0xe0000000,
965 0x3c2a0, 0xffffffff, 0x00000100,
966 0x3c208, 0xffffffff, 0x00000100,
967 0x3c2c0, 0xffffffff, 0xc0000100,
968 0x3c2c8, 0xffffffff, 0xc0000100,
969 0x3c2c4, 0xffffffff, 0xc0000100,
970 0x55e4, 0xffffffff, 0x00600100,
971 0x3c280, 0xffffffff, 0x00000100,
972 0x3c214, 0xffffffff, 0x06000100,
973 0x3c220, 0xffffffff, 0x00000100,
974 0x3c218, 0xffffffff, 0x06000100,
975 0x3c204, 0xffffffff, 0x00000100,
976 0x3c2e0, 0xffffffff, 0x00000100,
977 0x3c224, 0xffffffff, 0x00000100,
978 0x3c200, 0xffffffff, 0x00000100,
979 0x3c230, 0xffffffff, 0x00000100,
980 0x3c234, 0xffffffff, 0x00000100,
981 0x3c250, 0xffffffff, 0x00000100,
982 0x3c254, 0xffffffff, 0x00000100,
983 0x3c258, 0xffffffff, 0x00000100,
984 0x3c25c, 0xffffffff, 0x00000100,
985 0x3c260, 0xffffffff, 0x00000100,
986 0x3c27c, 0xffffffff, 0x00000100,
987 0x3c278, 0xffffffff, 0x00000100,
988 0x3c210, 0xffffffff, 0x06000100,
989 0x3c290, 0xffffffff, 0x00000100,
990 0x3c274, 0xffffffff, 0x00000100,
991 0x3c2b4, 0xffffffff, 0x00000100,
992 0x3c2b0, 0xffffffff, 0x00000100,
993 0x3c270, 0xffffffff, 0x00000100,
994 0x30800, 0xffffffff, 0xe0000000,
995 0x3c020, 0xffffffff, 0x00010000,
996 0x3c024, 0xffffffff, 0x00030002,
997 0x3c028, 0xffffffff, 0x00040007,
998 0x3c02c, 0xffffffff, 0x00060005,
999 0x3c030, 0xffffffff, 0x00090008,
1000 0x3c034, 0xffffffff, 0x00010000,
1001 0x3c038, 0xffffffff, 0x00030002,
1002 0x3c03c, 0xffffffff, 0x00040007,
1003 0x3c040, 0xffffffff, 0x00060005,
1004 0x3c044, 0xffffffff, 0x00090008,
1005 0x3c048, 0xffffffff, 0x00010000,
1006 0x3c04c, 0xffffffff, 0x00030002,
1007 0x3c050, 0xffffffff, 0x00040007,
1008 0x3c054, 0xffffffff, 0x00060005,
1009 0x3c058, 0xffffffff, 0x00090008,
1010 0x3c05c, 0xffffffff, 0x00010000,
1011 0x3c060, 0xffffffff, 0x00030002,
1012 0x3c064, 0xffffffff, 0x00040007,
1013 0x3c068, 0xffffffff, 0x00060005,
1014 0x3c06c, 0xffffffff, 0x00090008,
1015 0x3c070, 0xffffffff, 0x00010000,
1016 0x3c074, 0xffffffff, 0x00030002,
1017 0x3c078, 0xffffffff, 0x00040007,
1018 0x3c07c, 0xffffffff, 0x00060005,
1019 0x3c080, 0xffffffff, 0x00090008,
1020 0x3c084, 0xffffffff, 0x00010000,
1021 0x3c088, 0xffffffff, 0x00030002,
1022 0x3c08c, 0xffffffff, 0x00040007,
1023 0x3c090, 0xffffffff, 0x00060005,
1024 0x3c094, 0xffffffff, 0x00090008,
1025 0x3c098, 0xffffffff, 0x00010000,
1026 0x3c09c, 0xffffffff, 0x00030002,
1027 0x3c0a0, 0xffffffff, 0x00040007,
1028 0x3c0a4, 0xffffffff, 0x00060005,
1029 0x3c0a8, 0xffffffff, 0x00090008,
1030 0x3c000, 0xffffffff, 0x96e00200,
1031 0x8708, 0xffffffff, 0x00900100,
1032 0xc424, 0xffffffff, 0x0020003f,
1033 0x38, 0xffffffff, 0x0140001c,
1034 0x3c, 0x000f0000, 0x000f0000,
1035 0x220, 0xffffffff, 0xC060000C,
1036 0x224, 0xc0000fff, 0x00000100,
1037 0xf90, 0xffffffff, 0x00000100,
1038 0xf98, 0x00000101, 0x00000000,
1039 0x20a8, 0xffffffff, 0x00000104,
1040 0x55e4, 0xff000fff, 0x00000100,
1041 0x30cc, 0xc0000fff, 0x00000104,
1042 0xc1e4, 0x00000001, 0x00000001,
1043 0xd00c, 0xff000ff0, 0x00000100,
1044 0xd80c, 0xff000ff0, 0x00000100
1045 };
1046
1047 static const u32 spectre_golden_spm_registers[] =
1048 {
1049 0x30800, 0xe0ffffff, 0xe0000000
1050 };
1051
1052 static const u32 spectre_golden_common_registers[] =
1053 {
1054 0xc770, 0xffffffff, 0x00000800,
1055 0xc774, 0xffffffff, 0x00000800,
1056 0xc798, 0xffffffff, 0x00007fbf,
1057 0xc79c, 0xffffffff, 0x00007faf
1058 };
1059
1060 static const u32 spectre_golden_registers[] =
1061 {
1062 0x3c000, 0xffff1fff, 0x96940200,
1063 0x3c00c, 0xffff0001, 0xff000000,
1064 0x3c200, 0xfffc0fff, 0x00000100,
1065 0x6ed8, 0x00010101, 0x00010000,
1066 0x9834, 0xf00fffff, 0x00000400,
1067 0x9838, 0xfffffffc, 0x00020200,
1068 0x5bb0, 0x000000f0, 0x00000070,
1069 0x5bc0, 0xf0311fff, 0x80300000,
1070 0x98f8, 0x73773777, 0x12010001,
1071 0x9b7c, 0x00ff0000, 0x00fc0000,
1072 0x2f48, 0x73773777, 0x12010001,
1073 0x8a14, 0xf000003f, 0x00000007,
1074 0x8b24, 0xffffffff, 0x00ffffff,
1075 0x28350, 0x3f3f3fff, 0x00000082,
1076 0x28355, 0x0000003f, 0x00000000,
1077 0x3e78, 0x00000001, 0x00000002,
1078 0x913c, 0xffff03df, 0x00000004,
1079 0xc768, 0x00000008, 0x00000008,
1080 0x8c00, 0x000008ff, 0x00000800,
1081 0x9508, 0x00010000, 0x00010000,
1082 0xac0c, 0xffffffff, 0x54763210,
1083 0x214f8, 0x01ff01ff, 0x00000002,
1084 0x21498, 0x007ff800, 0x00200000,
1085 0x2015c, 0xffffffff, 0x00000f40,
1086 0x30934, 0xffffffff, 0x00000001
1087 };
1088
1089 static const u32 spectre_mgcg_cgcg_init[] =
1090 {
1091 0xc420, 0xffffffff, 0xfffffffc,
1092 0x30800, 0xffffffff, 0xe0000000,
1093 0x3c2a0, 0xffffffff, 0x00000100,
1094 0x3c208, 0xffffffff, 0x00000100,
1095 0x3c2c0, 0xffffffff, 0x00000100,
1096 0x3c2c8, 0xffffffff, 0x00000100,
1097 0x3c2c4, 0xffffffff, 0x00000100,
1098 0x55e4, 0xffffffff, 0x00600100,
1099 0x3c280, 0xffffffff, 0x00000100,
1100 0x3c214, 0xffffffff, 0x06000100,
1101 0x3c220, 0xffffffff, 0x00000100,
1102 0x3c218, 0xffffffff, 0x06000100,
1103 0x3c204, 0xffffffff, 0x00000100,
1104 0x3c2e0, 0xffffffff, 0x00000100,
1105 0x3c224, 0xffffffff, 0x00000100,
1106 0x3c200, 0xffffffff, 0x00000100,
1107 0x3c230, 0xffffffff, 0x00000100,
1108 0x3c234, 0xffffffff, 0x00000100,
1109 0x3c250, 0xffffffff, 0x00000100,
1110 0x3c254, 0xffffffff, 0x00000100,
1111 0x3c258, 0xffffffff, 0x00000100,
1112 0x3c25c, 0xffffffff, 0x00000100,
1113 0x3c260, 0xffffffff, 0x00000100,
1114 0x3c27c, 0xffffffff, 0x00000100,
1115 0x3c278, 0xffffffff, 0x00000100,
1116 0x3c210, 0xffffffff, 0x06000100,
1117 0x3c290, 0xffffffff, 0x00000100,
1118 0x3c274, 0xffffffff, 0x00000100,
1119 0x3c2b4, 0xffffffff, 0x00000100,
1120 0x3c2b0, 0xffffffff, 0x00000100,
1121 0x3c270, 0xffffffff, 0x00000100,
1122 0x30800, 0xffffffff, 0xe0000000,
1123 0x3c020, 0xffffffff, 0x00010000,
1124 0x3c024, 0xffffffff, 0x00030002,
1125 0x3c028, 0xffffffff, 0x00040007,
1126 0x3c02c, 0xffffffff, 0x00060005,
1127 0x3c030, 0xffffffff, 0x00090008,
1128 0x3c034, 0xffffffff, 0x00010000,
1129 0x3c038, 0xffffffff, 0x00030002,
1130 0x3c03c, 0xffffffff, 0x00040007,
1131 0x3c040, 0xffffffff, 0x00060005,
1132 0x3c044, 0xffffffff, 0x00090008,
1133 0x3c048, 0xffffffff, 0x00010000,
1134 0x3c04c, 0xffffffff, 0x00030002,
1135 0x3c050, 0xffffffff, 0x00040007,
1136 0x3c054, 0xffffffff, 0x00060005,
1137 0x3c058, 0xffffffff, 0x00090008,
1138 0x3c05c, 0xffffffff, 0x00010000,
1139 0x3c060, 0xffffffff, 0x00030002,
1140 0x3c064, 0xffffffff, 0x00040007,
1141 0x3c068, 0xffffffff, 0x00060005,
1142 0x3c06c, 0xffffffff, 0x00090008,
1143 0x3c070, 0xffffffff, 0x00010000,
1144 0x3c074, 0xffffffff, 0x00030002,
1145 0x3c078, 0xffffffff, 0x00040007,
1146 0x3c07c, 0xffffffff, 0x00060005,
1147 0x3c080, 0xffffffff, 0x00090008,
1148 0x3c084, 0xffffffff, 0x00010000,
1149 0x3c088, 0xffffffff, 0x00030002,
1150 0x3c08c, 0xffffffff, 0x00040007,
1151 0x3c090, 0xffffffff, 0x00060005,
1152 0x3c094, 0xffffffff, 0x00090008,
1153 0x3c098, 0xffffffff, 0x00010000,
1154 0x3c09c, 0xffffffff, 0x00030002,
1155 0x3c0a0, 0xffffffff, 0x00040007,
1156 0x3c0a4, 0xffffffff, 0x00060005,
1157 0x3c0a8, 0xffffffff, 0x00090008,
1158 0x3c0ac, 0xffffffff, 0x00010000,
1159 0x3c0b0, 0xffffffff, 0x00030002,
1160 0x3c0b4, 0xffffffff, 0x00040007,
1161 0x3c0b8, 0xffffffff, 0x00060005,
1162 0x3c0bc, 0xffffffff, 0x00090008,
1163 0x3c000, 0xffffffff, 0x96e00200,
1164 0x8708, 0xffffffff, 0x00900100,
1165 0xc424, 0xffffffff, 0x0020003f,
1166 0x38, 0xffffffff, 0x0140001c,
1167 0x3c, 0x000f0000, 0x000f0000,
1168 0x220, 0xffffffff, 0xC060000C,
1169 0x224, 0xc0000fff, 0x00000100,
1170 0xf90, 0xffffffff, 0x00000100,
1171 0xf98, 0x00000101, 0x00000000,
1172 0x20a8, 0xffffffff, 0x00000104,
1173 0x55e4, 0xff000fff, 0x00000100,
1174 0x30cc, 0xc0000fff, 0x00000104,
1175 0xc1e4, 0x00000001, 0x00000001,
1176 0xd00c, 0xff000ff0, 0x00000100,
1177 0xd80c, 0xff000ff0, 0x00000100
1178 };
1179
1180 static const u32 kalindi_golden_spm_registers[] =
1181 {
1182 0x30800, 0xe0ffffff, 0xe0000000
1183 };
1184
1185 static const u32 kalindi_golden_common_registers[] =
1186 {
1187 0xc770, 0xffffffff, 0x00000800,
1188 0xc774, 0xffffffff, 0x00000800,
1189 0xc798, 0xffffffff, 0x00007fbf,
1190 0xc79c, 0xffffffff, 0x00007faf
1191 };
1192
1193 static const u32 kalindi_golden_registers[] =
1194 {
1195 0x3c000, 0xffffdfff, 0x6e944040,
1196 0x55e4, 0xff607fff, 0xfc000100,
1197 0x3c220, 0xff000fff, 0x00000100,
1198 0x3c224, 0xff000fff, 0x00000100,
1199 0x3c200, 0xfffc0fff, 0x00000100,
1200 0x6ed8, 0x00010101, 0x00010000,
1201 0x9830, 0xffffffff, 0x00000000,
1202 0x9834, 0xf00fffff, 0x00000400,
1203 0x5bb0, 0x000000f0, 0x00000070,
1204 0x5bc0, 0xf0311fff, 0x80300000,
1205 0x98f8, 0x73773777, 0x12010001,
1206 0x98fc, 0xffffffff, 0x00000010,
1207 0x9b7c, 0x00ff0000, 0x00fc0000,
1208 0x8030, 0x00001f0f, 0x0000100a,
1209 0x2f48, 0x73773777, 0x12010001,
1210 0x2408, 0x000fffff, 0x000c007f,
1211 0x8a14, 0xf000003f, 0x00000007,
1212 0x8b24, 0x3fff3fff, 0x00ffcfff,
1213 0x30a04, 0x0000ff0f, 0x00000000,
1214 0x28a4c, 0x07ffffff, 0x06000000,
1215 0x4d8, 0x00000fff, 0x00000100,
1216 0x3e78, 0x00000001, 0x00000002,
1217 0xc768, 0x00000008, 0x00000008,
1218 0x8c00, 0x000000ff, 0x00000003,
1219 0x214f8, 0x01ff01ff, 0x00000002,
1220 0x21498, 0x007ff800, 0x00200000,
1221 0x2015c, 0xffffffff, 0x00000f40,
1222 0x88c4, 0x001f3ae3, 0x00000082,
1223 0x88d4, 0x0000001f, 0x00000010,
1224 0x30934, 0xffffffff, 0x00000000
1225 };
1226
1227 static const u32 kalindi_mgcg_cgcg_init[] =
1228 {
1229 0xc420, 0xffffffff, 0xfffffffc,
1230 0x30800, 0xffffffff, 0xe0000000,
1231 0x3c2a0, 0xffffffff, 0x00000100,
1232 0x3c208, 0xffffffff, 0x00000100,
1233 0x3c2c0, 0xffffffff, 0x00000100,
1234 0x3c2c8, 0xffffffff, 0x00000100,
1235 0x3c2c4, 0xffffffff, 0x00000100,
1236 0x55e4, 0xffffffff, 0x00600100,
1237 0x3c280, 0xffffffff, 0x00000100,
1238 0x3c214, 0xffffffff, 0x06000100,
1239 0x3c220, 0xffffffff, 0x00000100,
1240 0x3c218, 0xffffffff, 0x06000100,
1241 0x3c204, 0xffffffff, 0x00000100,
1242 0x3c2e0, 0xffffffff, 0x00000100,
1243 0x3c224, 0xffffffff, 0x00000100,
1244 0x3c200, 0xffffffff, 0x00000100,
1245 0x3c230, 0xffffffff, 0x00000100,
1246 0x3c234, 0xffffffff, 0x00000100,
1247 0x3c250, 0xffffffff, 0x00000100,
1248 0x3c254, 0xffffffff, 0x00000100,
1249 0x3c258, 0xffffffff, 0x00000100,
1250 0x3c25c, 0xffffffff, 0x00000100,
1251 0x3c260, 0xffffffff, 0x00000100,
1252 0x3c27c, 0xffffffff, 0x00000100,
1253 0x3c278, 0xffffffff, 0x00000100,
1254 0x3c210, 0xffffffff, 0x06000100,
1255 0x3c290, 0xffffffff, 0x00000100,
1256 0x3c274, 0xffffffff, 0x00000100,
1257 0x3c2b4, 0xffffffff, 0x00000100,
1258 0x3c2b0, 0xffffffff, 0x00000100,
1259 0x3c270, 0xffffffff, 0x00000100,
1260 0x30800, 0xffffffff, 0xe0000000,
1261 0x3c020, 0xffffffff, 0x00010000,
1262 0x3c024, 0xffffffff, 0x00030002,
1263 0x3c028, 0xffffffff, 0x00040007,
1264 0x3c02c, 0xffffffff, 0x00060005,
1265 0x3c030, 0xffffffff, 0x00090008,
1266 0x3c034, 0xffffffff, 0x00010000,
1267 0x3c038, 0xffffffff, 0x00030002,
1268 0x3c03c, 0xffffffff, 0x00040007,
1269 0x3c040, 0xffffffff, 0x00060005,
1270 0x3c044, 0xffffffff, 0x00090008,
1271 0x3c000, 0xffffffff, 0x96e00200,
1272 0x8708, 0xffffffff, 0x00900100,
1273 0xc424, 0xffffffff, 0x0020003f,
1274 0x38, 0xffffffff, 0x0140001c,
1275 0x3c, 0x000f0000, 0x000f0000,
1276 0x220, 0xffffffff, 0xC060000C,
1277 0x224, 0xc0000fff, 0x00000100,
1278 0x20a8, 0xffffffff, 0x00000104,
1279 0x55e4, 0xff000fff, 0x00000100,
1280 0x30cc, 0xc0000fff, 0x00000104,
1281 0xc1e4, 0x00000001, 0x00000001,
1282 0xd00c, 0xff000ff0, 0x00000100,
1283 0xd80c, 0xff000ff0, 0x00000100
1284 };
1285
1286 static void cik_init_golden_registers(struct radeon_device *rdev)
1287 {
1288 switch (rdev->family) {
1289 case CHIP_BONAIRE:
1290 radeon_program_register_sequence(rdev,
1291 bonaire_mgcg_cgcg_init,
1292 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1293 radeon_program_register_sequence(rdev,
1294 bonaire_golden_registers,
1295 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1296 radeon_program_register_sequence(rdev,
1297 bonaire_golden_common_registers,
1298 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1299 radeon_program_register_sequence(rdev,
1300 bonaire_golden_spm_registers,
1301 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1302 break;
1303 case CHIP_KABINI:
1304 radeon_program_register_sequence(rdev,
1305 kalindi_mgcg_cgcg_init,
1306 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1307 radeon_program_register_sequence(rdev,
1308 kalindi_golden_registers,
1309 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1310 radeon_program_register_sequence(rdev,
1311 kalindi_golden_common_registers,
1312 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1313 radeon_program_register_sequence(rdev,
1314 kalindi_golden_spm_registers,
1315 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1316 break;
1317 case CHIP_KAVERI:
1318 radeon_program_register_sequence(rdev,
1319 spectre_mgcg_cgcg_init,
1320 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1321 radeon_program_register_sequence(rdev,
1322 spectre_golden_registers,
1323 (const u32)ARRAY_SIZE(spectre_golden_registers));
1324 radeon_program_register_sequence(rdev,
1325 spectre_golden_common_registers,
1326 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1327 radeon_program_register_sequence(rdev,
1328 spectre_golden_spm_registers,
1329 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1330 break;
1331 default:
1332 break;
1333 }
1334 }
1335
1336 /**
1337 * cik_get_xclk - get the xclk
1338 *
1339 * @rdev: radeon_device pointer
1340 *
1341 * Returns the reference clock used by the gfx engine
1342 * (CIK).
1343 */
1344 u32 cik_get_xclk(struct radeon_device *rdev)
1345 {
1346 u32 reference_clock = rdev->clock.spll.reference_freq;
1347
1348 if (rdev->flags & RADEON_IS_IGP) {
1349 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1350 return reference_clock / 2;
1351 } else {
1352 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1353 return reference_clock / 4;
1354 }
1355 return reference_clock;
1356 }
1357
1358 /**
1359 * cik_mm_rdoorbell - read a doorbell dword
1360 *
1361 * @rdev: radeon_device pointer
1362 * @offset: byte offset into the aperture
1363 *
1364 * Returns the value in the doorbell aperture at the
1365 * requested offset (CIK).
1366 */
1367 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1368 {
1369 if (offset < rdev->doorbell.size) {
1370 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1371 } else {
1372 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1373 return 0;
1374 }
1375 }
1376
1377 /**
1378 * cik_mm_wdoorbell - write a doorbell dword
1379 *
1380 * @rdev: radeon_device pointer
1381 * @offset: byte offset into the aperture
1382 * @v: value to write
1383 *
1384 * Writes @v to the doorbell aperture at the
1385 * requested offset (CIK).
1386 */
1387 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1388 {
1389 if (offset < rdev->doorbell.size) {
1390 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1391 } else {
1392 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1393 }
1394 }
1395
1396 #define BONAIRE_IO_MC_REGS_SIZE 36
1397
1398 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1399 {
1400 {0x00000070, 0x04400000},
1401 {0x00000071, 0x80c01803},
1402 {0x00000072, 0x00004004},
1403 {0x00000073, 0x00000100},
1404 {0x00000074, 0x00ff0000},
1405 {0x00000075, 0x34000000},
1406 {0x00000076, 0x08000014},
1407 {0x00000077, 0x00cc08ec},
1408 {0x00000078, 0x00000400},
1409 {0x00000079, 0x00000000},
1410 {0x0000007a, 0x04090000},
1411 {0x0000007c, 0x00000000},
1412 {0x0000007e, 0x4408a8e8},
1413 {0x0000007f, 0x00000304},
1414 {0x00000080, 0x00000000},
1415 {0x00000082, 0x00000001},
1416 {0x00000083, 0x00000002},
1417 {0x00000084, 0xf3e4f400},
1418 {0x00000085, 0x052024e3},
1419 {0x00000087, 0x00000000},
1420 {0x00000088, 0x01000000},
1421 {0x0000008a, 0x1c0a0000},
1422 {0x0000008b, 0xff010000},
1423 {0x0000008d, 0xffffefff},
1424 {0x0000008e, 0xfff3efff},
1425 {0x0000008f, 0xfff3efbf},
1426 {0x00000092, 0xf7ffffff},
1427 {0x00000093, 0xffffff7f},
1428 {0x00000095, 0x00101101},
1429 {0x00000096, 0x00000fff},
1430 {0x00000097, 0x00116fff},
1431 {0x00000098, 0x60010000},
1432 {0x00000099, 0x10010000},
1433 {0x0000009a, 0x00006000},
1434 {0x0000009b, 0x00001000},
1435 {0x0000009f, 0x00b48000}
1436 };
1437
1438 /**
1439 * cik_srbm_select - select specific register instances
1440 *
1441 * @rdev: radeon_device pointer
1442 * @me: selected ME (micro engine)
1443 * @pipe: pipe
1444 * @queue: queue
1445 * @vmid: VMID
1446 *
1447 * Switches the currently active registers instances. Some
1448 * registers are instanced per VMID, others are instanced per
1449 * me/pipe/queue combination.
1450 */
1451 static void cik_srbm_select(struct radeon_device *rdev,
1452 u32 me, u32 pipe, u32 queue, u32 vmid)
1453 {
1454 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1455 MEID(me & 0x3) |
1456 VMID(vmid & 0xf) |
1457 QUEUEID(queue & 0x7));
1458 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1459 }
1460
1461 /* ucode loading */
1462 /**
1463 * ci_mc_load_microcode - load MC ucode into the hw
1464 *
1465 * @rdev: radeon_device pointer
1466 *
1467 * Load the GDDR MC ucode into the hw (CIK).
1468 * Returns 0 on success, error on failure.
1469 */
1470 static int ci_mc_load_microcode(struct radeon_device *rdev)
1471 {
1472 const __be32 *fw_data;
1473 u32 running, blackout = 0;
1474 u32 *io_mc_regs;
1475 int i, ucode_size, regs_size;
1476
1477 if (!rdev->mc_fw)
1478 return -EINVAL;
1479
1480 switch (rdev->family) {
1481 case CHIP_BONAIRE:
1482 default:
1483 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1484 ucode_size = CIK_MC_UCODE_SIZE;
1485 regs_size = BONAIRE_IO_MC_REGS_SIZE;
1486 break;
1487 }
1488
1489 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1490
1491 if (running == 0) {
1492 if (running) {
1493 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1494 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1495 }
1496
1497 /* reset the engine and set to writable */
1498 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1499 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1500
1501 /* load mc io regs */
1502 for (i = 0; i < regs_size; i++) {
1503 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1504 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1505 }
1506 /* load the MC ucode */
1507 fw_data = (const __be32 *)rdev->mc_fw->data;
1508 for (i = 0; i < ucode_size; i++)
1509 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1510
1511 /* put the engine back into the active state */
1512 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1513 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1514 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1515
1516 /* wait for training to complete */
1517 for (i = 0; i < rdev->usec_timeout; i++) {
1518 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1519 break;
1520 udelay(1);
1521 }
1522 for (i = 0; i < rdev->usec_timeout; i++) {
1523 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1524 break;
1525 udelay(1);
1526 }
1527
1528 if (running)
1529 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1530 }
1531
1532 return 0;
1533 }
1534
1535 /**
1536 * cik_init_microcode - load ucode images from disk
1537 *
1538 * @rdev: radeon_device pointer
1539 *
1540 * Use the firmware interface to load the ucode images into
1541 * the driver (not loaded into hw).
1542 * Returns 0 on success, error on failure.
1543 */
1544 static int cik_init_microcode(struct radeon_device *rdev)
1545 {
1546 const char *chip_name;
1547 size_t pfp_req_size, me_req_size, ce_req_size,
1548 mec_req_size, rlc_req_size, mc_req_size,
1549 sdma_req_size, smc_req_size;
1550 char fw_name[30];
1551 int err;
1552
1553 DRM_DEBUG("\n");
1554
1555 switch (rdev->family) {
1556 case CHIP_BONAIRE:
1557 chip_name = "BONAIRE";
1558 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1559 me_req_size = CIK_ME_UCODE_SIZE * 4;
1560 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1561 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1562 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1563 mc_req_size = CIK_MC_UCODE_SIZE * 4;
1564 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1565 smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1566 break;
1567 case CHIP_KAVERI:
1568 chip_name = "KAVERI";
1569 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1570 me_req_size = CIK_ME_UCODE_SIZE * 4;
1571 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1572 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1573 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1574 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1575 break;
1576 case CHIP_KABINI:
1577 chip_name = "KABINI";
1578 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1579 me_req_size = CIK_ME_UCODE_SIZE * 4;
1580 ce_req_size = CIK_CE_UCODE_SIZE * 4;
1581 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1582 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1583 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1584 break;
1585 default: BUG();
1586 }
1587
1588 DRM_INFO("Loading %s Microcode\n", chip_name);
1589
1590 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1591 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1592 if (err)
1593 goto out;
1594 if (rdev->pfp_fw->size != pfp_req_size) {
1595 printk(KERN_ERR
1596 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1597 rdev->pfp_fw->size, fw_name);
1598 err = -EINVAL;
1599 goto out;
1600 }
1601
1602 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1603 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1604 if (err)
1605 goto out;
1606 if (rdev->me_fw->size != me_req_size) {
1607 printk(KERN_ERR
1608 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1609 rdev->me_fw->size, fw_name);
1610 err = -EINVAL;
1611 }
1612
1613 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1614 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1615 if (err)
1616 goto out;
1617 if (rdev->ce_fw->size != ce_req_size) {
1618 printk(KERN_ERR
1619 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1620 rdev->ce_fw->size, fw_name);
1621 err = -EINVAL;
1622 }
1623
1624 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1625 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1626 if (err)
1627 goto out;
1628 if (rdev->mec_fw->size != mec_req_size) {
1629 printk(KERN_ERR
1630 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1631 rdev->mec_fw->size, fw_name);
1632 err = -EINVAL;
1633 }
1634
1635 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1636 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1637 if (err)
1638 goto out;
1639 if (rdev->rlc_fw->size != rlc_req_size) {
1640 printk(KERN_ERR
1641 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1642 rdev->rlc_fw->size, fw_name);
1643 err = -EINVAL;
1644 }
1645
1646 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1647 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1648 if (err)
1649 goto out;
1650 if (rdev->sdma_fw->size != sdma_req_size) {
1651 printk(KERN_ERR
1652 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1653 rdev->sdma_fw->size, fw_name);
1654 err = -EINVAL;
1655 }
1656
1657 /* No SMC, MC ucode on APUs */
1658 if (!(rdev->flags & RADEON_IS_IGP)) {
1659 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1660 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1661 if (err)
1662 goto out;
1663 if (rdev->mc_fw->size != mc_req_size) {
1664 printk(KERN_ERR
1665 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1666 rdev->mc_fw->size, fw_name);
1667 err = -EINVAL;
1668 }
1669
1670 snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1671 err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1672 if (err) {
1673 printk(KERN_ERR
1674 "smc: error loading firmware \"%s\"\n",
1675 fw_name);
1676 release_firmware(rdev->smc_fw);
1677 rdev->smc_fw = NULL;
1678 } else if (rdev->smc_fw->size != smc_req_size) {
1679 printk(KERN_ERR
1680 "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1681 rdev->smc_fw->size, fw_name);
1682 err = -EINVAL;
1683 }
1684 }
1685
1686 out:
1687 if (err) {
1688 if (err != -EINVAL)
1689 printk(KERN_ERR
1690 "cik_cp: Failed to load firmware \"%s\"\n",
1691 fw_name);
1692 release_firmware(rdev->pfp_fw);
1693 rdev->pfp_fw = NULL;
1694 release_firmware(rdev->me_fw);
1695 rdev->me_fw = NULL;
1696 release_firmware(rdev->ce_fw);
1697 rdev->ce_fw = NULL;
1698 release_firmware(rdev->rlc_fw);
1699 rdev->rlc_fw = NULL;
1700 release_firmware(rdev->mc_fw);
1701 rdev->mc_fw = NULL;
1702 release_firmware(rdev->smc_fw);
1703 rdev->smc_fw = NULL;
1704 }
1705 return err;
1706 }
1707
1708 /*
1709 * Core functions
1710 */
1711 /**
1712 * cik_tiling_mode_table_init - init the hw tiling table
1713 *
1714 * @rdev: radeon_device pointer
1715 *
1716 * Starting with SI, the tiling setup is done globally in a
1717 * set of 32 tiling modes. Rather than selecting each set of
1718 * parameters per surface as on older asics, we just select
1719 * which index in the tiling table we want to use, and the
1720 * surface uses those parameters (CIK).
1721 */
1722 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1723 {
1724 const u32 num_tile_mode_states = 32;
1725 const u32 num_secondary_tile_mode_states = 16;
1726 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1727 u32 num_pipe_configs;
1728 u32 num_rbs = rdev->config.cik.max_backends_per_se *
1729 rdev->config.cik.max_shader_engines;
1730
1731 switch (rdev->config.cik.mem_row_size_in_kb) {
1732 case 1:
1733 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1734 break;
1735 case 2:
1736 default:
1737 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1738 break;
1739 case 4:
1740 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1741 break;
1742 }
1743
1744 num_pipe_configs = rdev->config.cik.max_tile_pipes;
1745 if (num_pipe_configs > 8)
1746 num_pipe_configs = 8; /* ??? */
1747
1748 if (num_pipe_configs == 8) {
1749 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1750 switch (reg_offset) {
1751 case 0:
1752 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1754 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1755 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1756 break;
1757 case 1:
1758 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1760 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1761 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1762 break;
1763 case 2:
1764 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1765 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1766 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1767 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1768 break;
1769 case 3:
1770 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1771 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1772 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1773 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1774 break;
1775 case 4:
1776 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1777 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1778 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1779 TILE_SPLIT(split_equal_to_row_size));
1780 break;
1781 case 5:
1782 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1783 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1784 break;
1785 case 6:
1786 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1787 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1788 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1789 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1790 break;
1791 case 7:
1792 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1793 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1794 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1795 TILE_SPLIT(split_equal_to_row_size));
1796 break;
1797 case 8:
1798 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1799 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1800 break;
1801 case 9:
1802 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1803 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1804 break;
1805 case 10:
1806 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1807 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1808 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1810 break;
1811 case 11:
1812 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1813 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1814 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1815 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1816 break;
1817 case 12:
1818 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1819 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1820 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1821 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1822 break;
1823 case 13:
1824 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1825 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1826 break;
1827 case 14:
1828 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1829 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1830 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1831 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1832 break;
1833 case 16:
1834 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1835 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1836 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1837 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1838 break;
1839 case 17:
1840 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1841 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1842 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1843 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1844 break;
1845 case 27:
1846 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1847 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1848 break;
1849 case 28:
1850 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1852 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1853 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1854 break;
1855 case 29:
1856 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1857 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1858 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1859 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1860 break;
1861 case 30:
1862 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1863 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1864 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1865 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1866 break;
1867 default:
1868 gb_tile_moden = 0;
1869 break;
1870 }
1871 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1872 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1873 }
1874 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1875 switch (reg_offset) {
1876 case 0:
1877 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1878 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1879 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1880 NUM_BANKS(ADDR_SURF_16_BANK));
1881 break;
1882 case 1:
1883 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1884 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1885 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1886 NUM_BANKS(ADDR_SURF_16_BANK));
1887 break;
1888 case 2:
1889 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1890 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1891 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1892 NUM_BANKS(ADDR_SURF_16_BANK));
1893 break;
1894 case 3:
1895 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1896 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1897 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1898 NUM_BANKS(ADDR_SURF_16_BANK));
1899 break;
1900 case 4:
1901 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1902 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1903 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1904 NUM_BANKS(ADDR_SURF_8_BANK));
1905 break;
1906 case 5:
1907 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1908 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1909 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1910 NUM_BANKS(ADDR_SURF_4_BANK));
1911 break;
1912 case 6:
1913 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1914 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1915 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1916 NUM_BANKS(ADDR_SURF_2_BANK));
1917 break;
1918 case 8:
1919 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1920 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1921 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1922 NUM_BANKS(ADDR_SURF_16_BANK));
1923 break;
1924 case 9:
1925 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1926 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1927 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1928 NUM_BANKS(ADDR_SURF_16_BANK));
1929 break;
1930 case 10:
1931 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1932 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1933 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1934 NUM_BANKS(ADDR_SURF_16_BANK));
1935 break;
1936 case 11:
1937 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1938 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1939 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1940 NUM_BANKS(ADDR_SURF_16_BANK));
1941 break;
1942 case 12:
1943 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1944 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1945 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1946 NUM_BANKS(ADDR_SURF_8_BANK));
1947 break;
1948 case 13:
1949 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1950 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1951 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1952 NUM_BANKS(ADDR_SURF_4_BANK));
1953 break;
1954 case 14:
1955 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1956 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1957 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1958 NUM_BANKS(ADDR_SURF_2_BANK));
1959 break;
1960 default:
1961 gb_tile_moden = 0;
1962 break;
1963 }
1964 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1965 }
1966 } else if (num_pipe_configs == 4) {
1967 if (num_rbs == 4) {
1968 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1969 switch (reg_offset) {
1970 case 0:
1971 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1972 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1973 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1974 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1975 break;
1976 case 1:
1977 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1978 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1979 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1980 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1981 break;
1982 case 2:
1983 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1984 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1985 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1986 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1987 break;
1988 case 3:
1989 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1990 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1991 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1992 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1993 break;
1994 case 4:
1995 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1996 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1997 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1998 TILE_SPLIT(split_equal_to_row_size));
1999 break;
2000 case 5:
2001 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2002 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2003 break;
2004 case 6:
2005 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2006 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2007 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2008 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2009 break;
2010 case 7:
2011 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2012 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2013 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2014 TILE_SPLIT(split_equal_to_row_size));
2015 break;
2016 case 8:
2017 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2018 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2019 break;
2020 case 9:
2021 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2022 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2023 break;
2024 case 10:
2025 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2026 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2027 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2028 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2029 break;
2030 case 11:
2031 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2032 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2033 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2034 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2035 break;
2036 case 12:
2037 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2038 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2039 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2040 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2041 break;
2042 case 13:
2043 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2044 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2045 break;
2046 case 14:
2047 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2049 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2050 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2051 break;
2052 case 16:
2053 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2055 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2056 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2057 break;
2058 case 17:
2059 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2060 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2061 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2062 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2063 break;
2064 case 27:
2065 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2067 break;
2068 case 28:
2069 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2070 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2071 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 break;
2074 case 29:
2075 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2076 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2077 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2078 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2079 break;
2080 case 30:
2081 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2083 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2085 break;
2086 default:
2087 gb_tile_moden = 0;
2088 break;
2089 }
2090 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2091 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2092 }
2093 } else if (num_rbs < 4) {
2094 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2095 switch (reg_offset) {
2096 case 0:
2097 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2098 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2099 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2100 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2101 break;
2102 case 1:
2103 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2104 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2105 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2107 break;
2108 case 2:
2109 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2110 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2111 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2112 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2113 break;
2114 case 3:
2115 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2116 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2117 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2119 break;
2120 case 4:
2121 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2122 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2123 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2124 TILE_SPLIT(split_equal_to_row_size));
2125 break;
2126 case 5:
2127 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2128 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2129 break;
2130 case 6:
2131 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2132 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2133 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2134 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2135 break;
2136 case 7:
2137 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2138 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2139 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2140 TILE_SPLIT(split_equal_to_row_size));
2141 break;
2142 case 8:
2143 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2144 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2145 break;
2146 case 9:
2147 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2148 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2149 break;
2150 case 10:
2151 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2153 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2154 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2155 break;
2156 case 11:
2157 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2159 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2160 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161 break;
2162 case 12:
2163 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2164 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2165 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2166 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 break;
2168 case 13:
2169 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2171 break;
2172 case 14:
2173 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2175 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2176 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2177 break;
2178 case 16:
2179 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2180 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2181 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2182 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2183 break;
2184 case 17:
2185 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2186 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2187 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2189 break;
2190 case 27:
2191 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2192 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2193 break;
2194 case 28:
2195 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2196 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2197 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2198 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2199 break;
2200 case 29:
2201 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2203 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2205 break;
2206 case 30:
2207 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2208 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2209 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2210 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2211 break;
2212 default:
2213 gb_tile_moden = 0;
2214 break;
2215 }
2216 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2217 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2218 }
2219 }
2220 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2221 switch (reg_offset) {
2222 case 0:
2223 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2226 NUM_BANKS(ADDR_SURF_16_BANK));
2227 break;
2228 case 1:
2229 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2230 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2231 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2232 NUM_BANKS(ADDR_SURF_16_BANK));
2233 break;
2234 case 2:
2235 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2238 NUM_BANKS(ADDR_SURF_16_BANK));
2239 break;
2240 case 3:
2241 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2242 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2243 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2244 NUM_BANKS(ADDR_SURF_16_BANK));
2245 break;
2246 case 4:
2247 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2250 NUM_BANKS(ADDR_SURF_16_BANK));
2251 break;
2252 case 5:
2253 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2254 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2255 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2256 NUM_BANKS(ADDR_SURF_8_BANK));
2257 break;
2258 case 6:
2259 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2260 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2261 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2262 NUM_BANKS(ADDR_SURF_4_BANK));
2263 break;
2264 case 8:
2265 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2266 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2267 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2268 NUM_BANKS(ADDR_SURF_16_BANK));
2269 break;
2270 case 9:
2271 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2272 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2273 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2274 NUM_BANKS(ADDR_SURF_16_BANK));
2275 break;
2276 case 10:
2277 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2278 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2279 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2280 NUM_BANKS(ADDR_SURF_16_BANK));
2281 break;
2282 case 11:
2283 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2285 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2286 NUM_BANKS(ADDR_SURF_16_BANK));
2287 break;
2288 case 12:
2289 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2290 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2291 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2292 NUM_BANKS(ADDR_SURF_16_BANK));
2293 break;
2294 case 13:
2295 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2296 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2297 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2298 NUM_BANKS(ADDR_SURF_8_BANK));
2299 break;
2300 case 14:
2301 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2302 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2303 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2304 NUM_BANKS(ADDR_SURF_4_BANK));
2305 break;
2306 default:
2307 gb_tile_moden = 0;
2308 break;
2309 }
2310 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2311 }
2312 } else if (num_pipe_configs == 2) {
2313 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2314 switch (reg_offset) {
2315 case 0:
2316 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2318 PIPE_CONFIG(ADDR_SURF_P2) |
2319 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2320 break;
2321 case 1:
2322 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2324 PIPE_CONFIG(ADDR_SURF_P2) |
2325 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2326 break;
2327 case 2:
2328 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2330 PIPE_CONFIG(ADDR_SURF_P2) |
2331 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2332 break;
2333 case 3:
2334 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2336 PIPE_CONFIG(ADDR_SURF_P2) |
2337 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2338 break;
2339 case 4:
2340 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2342 PIPE_CONFIG(ADDR_SURF_P2) |
2343 TILE_SPLIT(split_equal_to_row_size));
2344 break;
2345 case 5:
2346 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2347 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2348 break;
2349 case 6:
2350 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2351 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2352 PIPE_CONFIG(ADDR_SURF_P2) |
2353 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2354 break;
2355 case 7:
2356 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2357 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2358 PIPE_CONFIG(ADDR_SURF_P2) |
2359 TILE_SPLIT(split_equal_to_row_size));
2360 break;
2361 case 8:
2362 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2363 break;
2364 case 9:
2365 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2367 break;
2368 case 10:
2369 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2371 PIPE_CONFIG(ADDR_SURF_P2) |
2372 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2373 break;
2374 case 11:
2375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2376 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2377 PIPE_CONFIG(ADDR_SURF_P2) |
2378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2379 break;
2380 case 12:
2381 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2383 PIPE_CONFIG(ADDR_SURF_P2) |
2384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2385 break;
2386 case 13:
2387 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2388 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2389 break;
2390 case 14:
2391 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2392 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393 PIPE_CONFIG(ADDR_SURF_P2) |
2394 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395 break;
2396 case 16:
2397 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2399 PIPE_CONFIG(ADDR_SURF_P2) |
2400 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2401 break;
2402 case 17:
2403 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2404 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2405 PIPE_CONFIG(ADDR_SURF_P2) |
2406 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2407 break;
2408 case 27:
2409 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2411 break;
2412 case 28:
2413 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2415 PIPE_CONFIG(ADDR_SURF_P2) |
2416 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2417 break;
2418 case 29:
2419 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2420 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2421 PIPE_CONFIG(ADDR_SURF_P2) |
2422 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2423 break;
2424 case 30:
2425 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427 PIPE_CONFIG(ADDR_SURF_P2) |
2428 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2429 break;
2430 default:
2431 gb_tile_moden = 0;
2432 break;
2433 }
2434 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2435 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2436 }
2437 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2438 switch (reg_offset) {
2439 case 0:
2440 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2441 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2442 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2443 NUM_BANKS(ADDR_SURF_16_BANK));
2444 break;
2445 case 1:
2446 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2447 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2448 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2449 NUM_BANKS(ADDR_SURF_16_BANK));
2450 break;
2451 case 2:
2452 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2453 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2454 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2455 NUM_BANKS(ADDR_SURF_16_BANK));
2456 break;
2457 case 3:
2458 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2459 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2460 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2461 NUM_BANKS(ADDR_SURF_16_BANK));
2462 break;
2463 case 4:
2464 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2465 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2466 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2467 NUM_BANKS(ADDR_SURF_16_BANK));
2468 break;
2469 case 5:
2470 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2471 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2472 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2473 NUM_BANKS(ADDR_SURF_16_BANK));
2474 break;
2475 case 6:
2476 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2477 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2478 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2479 NUM_BANKS(ADDR_SURF_8_BANK));
2480 break;
2481 case 8:
2482 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2483 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2484 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2485 NUM_BANKS(ADDR_SURF_16_BANK));
2486 break;
2487 case 9:
2488 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2489 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2490 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2491 NUM_BANKS(ADDR_SURF_16_BANK));
2492 break;
2493 case 10:
2494 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2495 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2496 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2497 NUM_BANKS(ADDR_SURF_16_BANK));
2498 break;
2499 case 11:
2500 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2501 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2502 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2503 NUM_BANKS(ADDR_SURF_16_BANK));
2504 break;
2505 case 12:
2506 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2508 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2509 NUM_BANKS(ADDR_SURF_16_BANK));
2510 break;
2511 case 13:
2512 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2514 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2515 NUM_BANKS(ADDR_SURF_16_BANK));
2516 break;
2517 case 14:
2518 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2519 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2520 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2521 NUM_BANKS(ADDR_SURF_8_BANK));
2522 break;
2523 default:
2524 gb_tile_moden = 0;
2525 break;
2526 }
2527 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2528 }
2529 } else
2530 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2531 }
2532
2533 /**
2534 * cik_select_se_sh - select which SE, SH to address
2535 *
2536 * @rdev: radeon_device pointer
2537 * @se_num: shader engine to address
2538 * @sh_num: sh block to address
2539 *
2540 * Select which SE, SH combinations to address. Certain
2541 * registers are instanced per SE or SH. 0xffffffff means
2542 * broadcast to all SEs or SHs (CIK).
2543 */
2544 static void cik_select_se_sh(struct radeon_device *rdev,
2545 u32 se_num, u32 sh_num)
2546 {
2547 u32 data = INSTANCE_BROADCAST_WRITES;
2548
2549 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2550 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2551 else if (se_num == 0xffffffff)
2552 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2553 else if (sh_num == 0xffffffff)
2554 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2555 else
2556 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2557 WREG32(GRBM_GFX_INDEX, data);
2558 }
2559
2560 /**
2561 * cik_create_bitmask - create a bitmask
2562 *
2563 * @bit_width: length of the mask
2564 *
2565 * create a variable length bit mask (CIK).
2566 * Returns the bitmask.
2567 */
2568 static u32 cik_create_bitmask(u32 bit_width)
2569 {
2570 u32 i, mask = 0;
2571
2572 for (i = 0; i < bit_width; i++) {
2573 mask <<= 1;
2574 mask |= 1;
2575 }
2576 return mask;
2577 }
2578
2579 /**
2580 * cik_select_se_sh - select which SE, SH to address
2581 *
2582 * @rdev: radeon_device pointer
2583 * @max_rb_num: max RBs (render backends) for the asic
2584 * @se_num: number of SEs (shader engines) for the asic
2585 * @sh_per_se: number of SH blocks per SE for the asic
2586 *
2587 * Calculates the bitmask of disabled RBs (CIK).
2588 * Returns the disabled RB bitmask.
2589 */
2590 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2591 u32 max_rb_num, u32 se_num,
2592 u32 sh_per_se)
2593 {
2594 u32 data, mask;
2595
2596 data = RREG32(CC_RB_BACKEND_DISABLE);
2597 if (data & 1)
2598 data &= BACKEND_DISABLE_MASK;
2599 else
2600 data = 0;
2601 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2602
2603 data >>= BACKEND_DISABLE_SHIFT;
2604
2605 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2606
2607 return data & mask;
2608 }
2609
2610 /**
2611 * cik_setup_rb - setup the RBs on the asic
2612 *
2613 * @rdev: radeon_device pointer
2614 * @se_num: number of SEs (shader engines) for the asic
2615 * @sh_per_se: number of SH blocks per SE for the asic
2616 * @max_rb_num: max RBs (render backends) for the asic
2617 *
2618 * Configures per-SE/SH RB registers (CIK).
2619 */
2620 static void cik_setup_rb(struct radeon_device *rdev,
2621 u32 se_num, u32 sh_per_se,
2622 u32 max_rb_num)
2623 {
2624 int i, j;
2625 u32 data, mask;
2626 u32 disabled_rbs = 0;
2627 u32 enabled_rbs = 0;
2628
2629 for (i = 0; i < se_num; i++) {
2630 for (j = 0; j < sh_per_se; j++) {
2631 cik_select_se_sh(rdev, i, j);
2632 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2633 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2634 }
2635 }
2636 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2637
2638 mask = 1;
2639 for (i = 0; i < max_rb_num; i++) {
2640 if (!(disabled_rbs & mask))
2641 enabled_rbs |= mask;
2642 mask <<= 1;
2643 }
2644
2645 for (i = 0; i < se_num; i++) {
2646 cik_select_se_sh(rdev, i, 0xffffffff);
2647 data = 0;
2648 for (j = 0; j < sh_per_se; j++) {
2649 switch (enabled_rbs & 3) {
2650 case 1:
2651 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2652 break;
2653 case 2:
2654 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2655 break;
2656 case 3:
2657 default:
2658 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2659 break;
2660 }
2661 enabled_rbs >>= 2;
2662 }
2663 WREG32(PA_SC_RASTER_CONFIG, data);
2664 }
2665 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2666 }
2667
2668 /**
2669 * cik_gpu_init - setup the 3D engine
2670 *
2671 * @rdev: radeon_device pointer
2672 *
2673 * Configures the 3D engine and tiling configuration
2674 * registers so that the 3D engine is usable.
2675 */
2676 static void cik_gpu_init(struct radeon_device *rdev)
2677 {
2678 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2679 u32 mc_shared_chmap, mc_arb_ramcfg;
2680 u32 hdp_host_path_cntl;
2681 u32 tmp;
2682 int i, j;
2683
2684 switch (rdev->family) {
2685 case CHIP_BONAIRE:
2686 rdev->config.cik.max_shader_engines = 2;
2687 rdev->config.cik.max_tile_pipes = 4;
2688 rdev->config.cik.max_cu_per_sh = 7;
2689 rdev->config.cik.max_sh_per_se = 1;
2690 rdev->config.cik.max_backends_per_se = 2;
2691 rdev->config.cik.max_texture_channel_caches = 4;
2692 rdev->config.cik.max_gprs = 256;
2693 rdev->config.cik.max_gs_threads = 32;
2694 rdev->config.cik.max_hw_contexts = 8;
2695
2696 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2697 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2698 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2699 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2700 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2701 break;
2702 case CHIP_KAVERI:
2703 /* TODO */
2704 break;
2705 case CHIP_KABINI:
2706 default:
2707 rdev->config.cik.max_shader_engines = 1;
2708 rdev->config.cik.max_tile_pipes = 2;
2709 rdev->config.cik.max_cu_per_sh = 2;
2710 rdev->config.cik.max_sh_per_se = 1;
2711 rdev->config.cik.max_backends_per_se = 1;
2712 rdev->config.cik.max_texture_channel_caches = 2;
2713 rdev->config.cik.max_gprs = 256;
2714 rdev->config.cik.max_gs_threads = 16;
2715 rdev->config.cik.max_hw_contexts = 8;
2716
2717 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2718 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2719 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2720 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2721 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2722 break;
2723 }
2724
2725 /* Initialize HDP */
2726 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2727 WREG32((0x2c14 + j), 0x00000000);
2728 WREG32((0x2c18 + j), 0x00000000);
2729 WREG32((0x2c1c + j), 0x00000000);
2730 WREG32((0x2c20 + j), 0x00000000);
2731 WREG32((0x2c24 + j), 0x00000000);
2732 }
2733
2734 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2735
2736 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2737
2738 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2739 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2740
2741 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2742 rdev->config.cik.mem_max_burst_length_bytes = 256;
2743 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2744 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2745 if (rdev->config.cik.mem_row_size_in_kb > 4)
2746 rdev->config.cik.mem_row_size_in_kb = 4;
2747 /* XXX use MC settings? */
2748 rdev->config.cik.shader_engine_tile_size = 32;
2749 rdev->config.cik.num_gpus = 1;
2750 rdev->config.cik.multi_gpu_tile_size = 64;
2751
2752 /* fix up row size */
2753 gb_addr_config &= ~ROW_SIZE_MASK;
2754 switch (rdev->config.cik.mem_row_size_in_kb) {
2755 case 1:
2756 default:
2757 gb_addr_config |= ROW_SIZE(0);
2758 break;
2759 case 2:
2760 gb_addr_config |= ROW_SIZE(1);
2761 break;
2762 case 4:
2763 gb_addr_config |= ROW_SIZE(2);
2764 break;
2765 }
2766
2767 /* setup tiling info dword. gb_addr_config is not adequate since it does
2768 * not have bank info, so create a custom tiling dword.
2769 * bits 3:0 num_pipes
2770 * bits 7:4 num_banks
2771 * bits 11:8 group_size
2772 * bits 15:12 row_size
2773 */
2774 rdev->config.cik.tile_config = 0;
2775 switch (rdev->config.cik.num_tile_pipes) {
2776 case 1:
2777 rdev->config.cik.tile_config |= (0 << 0);
2778 break;
2779 case 2:
2780 rdev->config.cik.tile_config |= (1 << 0);
2781 break;
2782 case 4:
2783 rdev->config.cik.tile_config |= (2 << 0);
2784 break;
2785 case 8:
2786 default:
2787 /* XXX what about 12? */
2788 rdev->config.cik.tile_config |= (3 << 0);
2789 break;
2790 }
2791 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2792 rdev->config.cik.tile_config |= 1 << 4;
2793 else
2794 rdev->config.cik.tile_config |= 0 << 4;
2795 rdev->config.cik.tile_config |=
2796 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2797 rdev->config.cik.tile_config |=
2798 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2799
2800 WREG32(GB_ADDR_CONFIG, gb_addr_config);
2801 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2802 WREG32(DMIF_ADDR_CALC, gb_addr_config);
2803 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2804 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2805 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2806 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2807 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2808
2809 cik_tiling_mode_table_init(rdev);
2810
2811 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2812 rdev->config.cik.max_sh_per_se,
2813 rdev->config.cik.max_backends_per_se);
2814
2815 /* set HW defaults for 3D engine */
2816 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2817
2818 WREG32(SX_DEBUG_1, 0x20);
2819
2820 WREG32(TA_CNTL_AUX, 0x00010000);
2821
2822 tmp = RREG32(SPI_CONFIG_CNTL);
2823 tmp |= 0x03000000;
2824 WREG32(SPI_CONFIG_CNTL, tmp);
2825
2826 WREG32(SQ_CONFIG, 1);
2827
2828 WREG32(DB_DEBUG, 0);
2829
2830 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2831 tmp |= 0x00000400;
2832 WREG32(DB_DEBUG2, tmp);
2833
2834 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2835 tmp |= 0x00020200;
2836 WREG32(DB_DEBUG3, tmp);
2837
2838 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2839 tmp |= 0x00018208;
2840 WREG32(CB_HW_CONTROL, tmp);
2841
2842 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2843
2844 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2845 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2846 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2847 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2848
2849 WREG32(VGT_NUM_INSTANCES, 1);
2850
2851 WREG32(CP_PERFMON_CNTL, 0);
2852
2853 WREG32(SQ_CONFIG, 0);
2854
2855 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2856 FORCE_EOV_MAX_REZ_CNT(255)));
2857
2858 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2859 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2860
2861 WREG32(VGT_GS_VERTEX_REUSE, 16);
2862 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2863
2864 tmp = RREG32(HDP_MISC_CNTL);
2865 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2866 WREG32(HDP_MISC_CNTL, tmp);
2867
2868 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2869 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2870
2871 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2872 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2873
2874 udelay(50);
2875 }
2876
2877 /*
2878 * GPU scratch registers helpers function.
2879 */
2880 /**
2881 * cik_scratch_init - setup driver info for CP scratch regs
2882 *
2883 * @rdev: radeon_device pointer
2884 *
2885 * Set up the number and offset of the CP scratch registers.
2886 * NOTE: use of CP scratch registers is a legacy inferface and
2887 * is not used by default on newer asics (r6xx+). On newer asics,
2888 * memory buffers are used for fences rather than scratch regs.
2889 */
2890 static void cik_scratch_init(struct radeon_device *rdev)
2891 {
2892 int i;
2893
2894 rdev->scratch.num_reg = 7;
2895 rdev->scratch.reg_base = SCRATCH_REG0;
2896 for (i = 0; i < rdev->scratch.num_reg; i++) {
2897 rdev->scratch.free[i] = true;
2898 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2899 }
2900 }
2901
2902 /**
2903 * cik_ring_test - basic gfx ring test
2904 *
2905 * @rdev: radeon_device pointer
2906 * @ring: radeon_ring structure holding ring information
2907 *
2908 * Allocate a scratch register and write to it using the gfx ring (CIK).
2909 * Provides a basic gfx ring test to verify that the ring is working.
2910 * Used by cik_cp_gfx_resume();
2911 * Returns 0 on success, error on failure.
2912 */
2913 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2914 {
2915 uint32_t scratch;
2916 uint32_t tmp = 0;
2917 unsigned i;
2918 int r;
2919
2920 r = radeon_scratch_get(rdev, &scratch);
2921 if (r) {
2922 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2923 return r;
2924 }
2925 WREG32(scratch, 0xCAFEDEAD);
2926 r = radeon_ring_lock(rdev, ring, 3);
2927 if (r) {
2928 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2929 radeon_scratch_free(rdev, scratch);
2930 return r;
2931 }
2932 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2933 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2934 radeon_ring_write(ring, 0xDEADBEEF);
2935 radeon_ring_unlock_commit(rdev, ring);
2936
2937 for (i = 0; i < rdev->usec_timeout; i++) {
2938 tmp = RREG32(scratch);
2939 if (tmp == 0xDEADBEEF)
2940 break;
2941 DRM_UDELAY(1);
2942 }
2943 if (i < rdev->usec_timeout) {
2944 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2945 } else {
2946 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2947 ring->idx, scratch, tmp);
2948 r = -EINVAL;
2949 }
2950 radeon_scratch_free(rdev, scratch);
2951 return r;
2952 }
2953
2954 /**
2955 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2956 *
2957 * @rdev: radeon_device pointer
2958 * @fence: radeon fence object
2959 *
2960 * Emits a fence sequnce number on the gfx ring and flushes
2961 * GPU caches.
2962 */
2963 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2964 struct radeon_fence *fence)
2965 {
2966 struct radeon_ring *ring = &rdev->ring[fence->ring];
2967 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2968
2969 /* EVENT_WRITE_EOP - flush caches, send int */
2970 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2971 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2972 EOP_TC_ACTION_EN |
2973 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2974 EVENT_INDEX(5)));
2975 radeon_ring_write(ring, addr & 0xfffffffc);
2976 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2977 radeon_ring_write(ring, fence->seq);
2978 radeon_ring_write(ring, 0);
2979 /* HDP flush */
2980 /* We should be using the new WAIT_REG_MEM special op packet here
2981 * but it causes the CP to hang
2982 */
2983 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2984 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2985 WRITE_DATA_DST_SEL(0)));
2986 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2987 radeon_ring_write(ring, 0);
2988 radeon_ring_write(ring, 0);
2989 }
2990
2991 /**
2992 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2993 *
2994 * @rdev: radeon_device pointer
2995 * @fence: radeon fence object
2996 *
2997 * Emits a fence sequnce number on the compute ring and flushes
2998 * GPU caches.
2999 */
3000 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3001 struct radeon_fence *fence)
3002 {
3003 struct radeon_ring *ring = &rdev->ring[fence->ring];
3004 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3005
3006 /* RELEASE_MEM - flush caches, send int */
3007 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3008 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3009 EOP_TC_ACTION_EN |
3010 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3011 EVENT_INDEX(5)));
3012 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3013 radeon_ring_write(ring, addr & 0xfffffffc);
3014 radeon_ring_write(ring, upper_32_bits(addr));
3015 radeon_ring_write(ring, fence->seq);
3016 radeon_ring_write(ring, 0);
3017 /* HDP flush */
3018 /* We should be using the new WAIT_REG_MEM special op packet here
3019 * but it causes the CP to hang
3020 */
3021 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3022 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3023 WRITE_DATA_DST_SEL(0)));
3024 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3025 radeon_ring_write(ring, 0);
3026 radeon_ring_write(ring, 0);
3027 }
3028
3029 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3030 struct radeon_ring *ring,
3031 struct radeon_semaphore *semaphore,
3032 bool emit_wait)
3033 {
3034 uint64_t addr = semaphore->gpu_addr;
3035 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3036
3037 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3038 radeon_ring_write(ring, addr & 0xffffffff);
3039 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3040 }
3041
3042 /*
3043 * IB stuff
3044 */
3045 /**
3046 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3047 *
3048 * @rdev: radeon_device pointer
3049 * @ib: radeon indirect buffer object
3050 *
3051 * Emits an DE (drawing engine) or CE (constant engine) IB
3052 * on the gfx ring. IBs are usually generated by userspace
3053 * acceleration drivers and submitted to the kernel for
3054 * sheduling on the ring. This function schedules the IB
3055 * on the gfx ring for execution by the GPU.
3056 */
3057 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3058 {
3059 struct radeon_ring *ring = &rdev->ring[ib->ring];
3060 u32 header, control = INDIRECT_BUFFER_VALID;
3061
3062 if (ib->is_const_ib) {
3063 /* set switch buffer packet before const IB */
3064 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3065 radeon_ring_write(ring, 0);
3066
3067 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3068 } else {
3069 u32 next_rptr;
3070 if (ring->rptr_save_reg) {
3071 next_rptr = ring->wptr + 3 + 4;
3072 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3073 radeon_ring_write(ring, ((ring->rptr_save_reg -
3074 PACKET3_SET_UCONFIG_REG_START) >> 2));
3075 radeon_ring_write(ring, next_rptr);
3076 } else if (rdev->wb.enabled) {
3077 next_rptr = ring->wptr + 5 + 4;
3078 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3079 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3080 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3081 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3082 radeon_ring_write(ring, next_rptr);
3083 }
3084
3085 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3086 }
3087
3088 control |= ib->length_dw |
3089 (ib->vm ? (ib->vm->id << 24) : 0);
3090
3091 radeon_ring_write(ring, header);
3092 radeon_ring_write(ring,
3093 #ifdef __BIG_ENDIAN
3094 (2 << 0) |
3095 #endif
3096 (ib->gpu_addr & 0xFFFFFFFC));
3097 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3098 radeon_ring_write(ring, control);
3099 }
3100
3101 /**
3102 * cik_ib_test - basic gfx ring IB test
3103 *
3104 * @rdev: radeon_device pointer
3105 * @ring: radeon_ring structure holding ring information
3106 *
3107 * Allocate an IB and execute it on the gfx ring (CIK).
3108 * Provides a basic gfx ring test to verify that IBs are working.
3109 * Returns 0 on success, error on failure.
3110 */
3111 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3112 {
3113 struct radeon_ib ib;
3114 uint32_t scratch;
3115 uint32_t tmp = 0;
3116 unsigned i;
3117 int r;
3118
3119 r = radeon_scratch_get(rdev, &scratch);
3120 if (r) {
3121 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3122 return r;
3123 }
3124 WREG32(scratch, 0xCAFEDEAD);
3125 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3126 if (r) {
3127 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3128 return r;
3129 }
3130 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3131 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3132 ib.ptr[2] = 0xDEADBEEF;
3133 ib.length_dw = 3;
3134 r = radeon_ib_schedule(rdev, &ib, NULL);
3135 if (r) {
3136 radeon_scratch_free(rdev, scratch);
3137 radeon_ib_free(rdev, &ib);
3138 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3139 return r;
3140 }
3141 r = radeon_fence_wait(ib.fence, false);
3142 if (r) {
3143 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3144 return r;
3145 }
3146 for (i = 0; i < rdev->usec_timeout; i++) {
3147 tmp = RREG32(scratch);
3148 if (tmp == 0xDEADBEEF)
3149 break;
3150 DRM_UDELAY(1);
3151 }
3152 if (i < rdev->usec_timeout) {
3153 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3154 } else {
3155 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3156 scratch, tmp);
3157 r = -EINVAL;
3158 }
3159 radeon_scratch_free(rdev, scratch);
3160 radeon_ib_free(rdev, &ib);
3161 return r;
3162 }
3163
3164 /*
3165 * CP.
3166 * On CIK, gfx and compute now have independant command processors.
3167 *
3168 * GFX
3169 * Gfx consists of a single ring and can process both gfx jobs and
3170 * compute jobs. The gfx CP consists of three microengines (ME):
3171 * PFP - Pre-Fetch Parser
3172 * ME - Micro Engine
3173 * CE - Constant Engine
3174 * The PFP and ME make up what is considered the Drawing Engine (DE).
3175 * The CE is an asynchronous engine used for updating buffer desciptors
3176 * used by the DE so that they can be loaded into cache in parallel
3177 * while the DE is processing state update packets.
3178 *
3179 * Compute
3180 * The compute CP consists of two microengines (ME):
3181 * MEC1 - Compute MicroEngine 1
3182 * MEC2 - Compute MicroEngine 2
3183 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3184 * The queues are exposed to userspace and are programmed directly
3185 * by the compute runtime.
3186 */
3187 /**
3188 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3189 *
3190 * @rdev: radeon_device pointer
3191 * @enable: enable or disable the MEs
3192 *
3193 * Halts or unhalts the gfx MEs.
3194 */
3195 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3196 {
3197 if (enable)
3198 WREG32(CP_ME_CNTL, 0);
3199 else {
3200 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3201 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3202 }
3203 udelay(50);
3204 }
3205
3206 /**
3207 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3208 *
3209 * @rdev: radeon_device pointer
3210 *
3211 * Loads the gfx PFP, ME, and CE ucode.
3212 * Returns 0 for success, -EINVAL if the ucode is not available.
3213 */
3214 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3215 {
3216 const __be32 *fw_data;
3217 int i;
3218
3219 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3220 return -EINVAL;
3221
3222 cik_cp_gfx_enable(rdev, false);
3223
3224 /* PFP */
3225 fw_data = (const __be32 *)rdev->pfp_fw->data;
3226 WREG32(CP_PFP_UCODE_ADDR, 0);
3227 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3228 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3229 WREG32(CP_PFP_UCODE_ADDR, 0);
3230
3231 /* CE */
3232 fw_data = (const __be32 *)rdev->ce_fw->data;
3233 WREG32(CP_CE_UCODE_ADDR, 0);
3234 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3235 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3236 WREG32(CP_CE_UCODE_ADDR, 0);
3237
3238 /* ME */
3239 fw_data = (const __be32 *)rdev->me_fw->data;
3240 WREG32(CP_ME_RAM_WADDR, 0);
3241 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3242 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3243 WREG32(CP_ME_RAM_WADDR, 0);
3244
3245 WREG32(CP_PFP_UCODE_ADDR, 0);
3246 WREG32(CP_CE_UCODE_ADDR, 0);
3247 WREG32(CP_ME_RAM_WADDR, 0);
3248 WREG32(CP_ME_RAM_RADDR, 0);
3249 return 0;
3250 }
3251
3252 /**
3253 * cik_cp_gfx_start - start the gfx ring
3254 *
3255 * @rdev: radeon_device pointer
3256 *
3257 * Enables the ring and loads the clear state context and other
3258 * packets required to init the ring.
3259 * Returns 0 for success, error for failure.
3260 */
3261 static int cik_cp_gfx_start(struct radeon_device *rdev)
3262 {
3263 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3264 int r, i;
3265
3266 /* init the CP */
3267 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3268 WREG32(CP_ENDIAN_SWAP, 0);
3269 WREG32(CP_DEVICE_ID, 1);
3270
3271 cik_cp_gfx_enable(rdev, true);
3272
3273 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3274 if (r) {
3275 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3276 return r;
3277 }
3278
3279 /* init the CE partitions. CE only used for gfx on CIK */
3280 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3281 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3282 radeon_ring_write(ring, 0xc000);
3283 radeon_ring_write(ring, 0xc000);
3284
3285 /* setup clear context state */
3286 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3287 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3288
3289 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3290 radeon_ring_write(ring, 0x80000000);
3291 radeon_ring_write(ring, 0x80000000);
3292
3293 for (i = 0; i < cik_default_size; i++)
3294 radeon_ring_write(ring, cik_default_state[i]);
3295
3296 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3297 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3298
3299 /* set clear context state */
3300 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3301 radeon_ring_write(ring, 0);
3302
3303 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3304 radeon_ring_write(ring, 0x00000316);
3305 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3306 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3307
3308 radeon_ring_unlock_commit(rdev, ring);
3309
3310 return 0;
3311 }
3312
3313 /**
3314 * cik_cp_gfx_fini - stop the gfx ring
3315 *
3316 * @rdev: radeon_device pointer
3317 *
3318 * Stop the gfx ring and tear down the driver ring
3319 * info.
3320 */
3321 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3322 {
3323 cik_cp_gfx_enable(rdev, false);
3324 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3325 }
3326
3327 /**
3328 * cik_cp_gfx_resume - setup the gfx ring buffer registers
3329 *
3330 * @rdev: radeon_device pointer
3331 *
3332 * Program the location and size of the gfx ring buffer
3333 * and test it to make sure it's working.
3334 * Returns 0 for success, error for failure.
3335 */
3336 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3337 {
3338 struct radeon_ring *ring;
3339 u32 tmp;
3340 u32 rb_bufsz;
3341 u64 rb_addr;
3342 int r;
3343
3344 WREG32(CP_SEM_WAIT_TIMER, 0x0);
3345 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3346
3347 /* Set the write pointer delay */
3348 WREG32(CP_RB_WPTR_DELAY, 0);
3349
3350 /* set the RB to use vmid 0 */
3351 WREG32(CP_RB_VMID, 0);
3352
3353 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3354
3355 /* ring 0 - compute and gfx */
3356 /* Set ring buffer size */
3357 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3358 rb_bufsz = drm_order(ring->ring_size / 8);
3359 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3360 #ifdef __BIG_ENDIAN
3361 tmp |= BUF_SWAP_32BIT;
3362 #endif
3363 WREG32(CP_RB0_CNTL, tmp);
3364
3365 /* Initialize the ring buffer's read and write pointers */
3366 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3367 ring->wptr = 0;
3368 WREG32(CP_RB0_WPTR, ring->wptr);
3369
3370 /* set the wb address wether it's enabled or not */
3371 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3372 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3373
3374 /* scratch register shadowing is no longer supported */
3375 WREG32(SCRATCH_UMSK, 0);
3376
3377 if (!rdev->wb.enabled)
3378 tmp |= RB_NO_UPDATE;
3379
3380 mdelay(1);
3381 WREG32(CP_RB0_CNTL, tmp);
3382
3383 rb_addr = ring->gpu_addr >> 8;
3384 WREG32(CP_RB0_BASE, rb_addr);
3385 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3386
3387 ring->rptr = RREG32(CP_RB0_RPTR);
3388
3389 /* start the ring */
3390 cik_cp_gfx_start(rdev);
3391 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3392 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3393 if (r) {
3394 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3395 return r;
3396 }
3397 return 0;
3398 }
3399
3400 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3401 struct radeon_ring *ring)
3402 {
3403 u32 rptr;
3404
3405
3406
3407 if (rdev->wb.enabled) {
3408 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3409 } else {
3410 mutex_lock(&rdev->srbm_mutex);
3411 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3412 rptr = RREG32(CP_HQD_PQ_RPTR);
3413 cik_srbm_select(rdev, 0, 0, 0, 0);
3414 mutex_unlock(&rdev->srbm_mutex);
3415 }
3416
3417 return rptr;
3418 }
3419
3420 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3421 struct radeon_ring *ring)
3422 {
3423 u32 wptr;
3424
3425 if (rdev->wb.enabled) {
3426 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3427 } else {
3428 mutex_lock(&rdev->srbm_mutex);
3429 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3430 wptr = RREG32(CP_HQD_PQ_WPTR);
3431 cik_srbm_select(rdev, 0, 0, 0, 0);
3432 mutex_unlock(&rdev->srbm_mutex);
3433 }
3434
3435 return wptr;
3436 }
3437
3438 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3439 struct radeon_ring *ring)
3440 {
3441 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3442 WDOORBELL32(ring->doorbell_offset, ring->wptr);
3443 }
3444
3445 /**
3446 * cik_cp_compute_enable - enable/disable the compute CP MEs
3447 *
3448 * @rdev: radeon_device pointer
3449 * @enable: enable or disable the MEs
3450 *
3451 * Halts or unhalts the compute MEs.
3452 */
3453 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3454 {
3455 if (enable)
3456 WREG32(CP_MEC_CNTL, 0);
3457 else
3458 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3459 udelay(50);
3460 }
3461
3462 /**
3463 * cik_cp_compute_load_microcode - load the compute CP ME ucode
3464 *
3465 * @rdev: radeon_device pointer
3466 *
3467 * Loads the compute MEC1&2 ucode.
3468 * Returns 0 for success, -EINVAL if the ucode is not available.
3469 */
3470 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3471 {
3472 const __be32 *fw_data;
3473 int i;
3474
3475 if (!rdev->mec_fw)
3476 return -EINVAL;
3477
3478 cik_cp_compute_enable(rdev, false);
3479
3480 /* MEC1 */
3481 fw_data = (const __be32 *)rdev->mec_fw->data;
3482 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3483 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3484 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3485 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3486
3487 if (rdev->family == CHIP_KAVERI) {
3488 /* MEC2 */
3489 fw_data = (const __be32 *)rdev->mec_fw->data;
3490 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3491 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3492 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3493 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3494 }
3495
3496 return 0;
3497 }
3498
3499 /**
3500 * cik_cp_compute_start - start the compute queues
3501 *
3502 * @rdev: radeon_device pointer
3503 *
3504 * Enable the compute queues.
3505 * Returns 0 for success, error for failure.
3506 */
3507 static int cik_cp_compute_start(struct radeon_device *rdev)
3508 {
3509 cik_cp_compute_enable(rdev, true);
3510
3511 return 0;
3512 }
3513
3514 /**
3515 * cik_cp_compute_fini - stop the compute queues
3516 *
3517 * @rdev: radeon_device pointer
3518 *
3519 * Stop the compute queues and tear down the driver queue
3520 * info.
3521 */
3522 static void cik_cp_compute_fini(struct radeon_device *rdev)
3523 {
3524 int i, idx, r;
3525
3526 cik_cp_compute_enable(rdev, false);
3527
3528 for (i = 0; i < 2; i++) {
3529 if (i == 0)
3530 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3531 else
3532 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3533
3534 if (rdev->ring[idx].mqd_obj) {
3535 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3536 if (unlikely(r != 0))
3537 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3538
3539 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3540 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3541
3542 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3543 rdev->ring[idx].mqd_obj = NULL;
3544 }
3545 }
3546 }
3547
3548 static void cik_mec_fini(struct radeon_device *rdev)
3549 {
3550 int r;
3551
3552 if (rdev->mec.hpd_eop_obj) {
3553 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3554 if (unlikely(r != 0))
3555 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3556 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3557 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3558
3559 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3560 rdev->mec.hpd_eop_obj = NULL;
3561 }
3562 }
3563
3564 #define MEC_HPD_SIZE 2048
3565
3566 static int cik_mec_init(struct radeon_device *rdev)
3567 {
3568 int r;
3569 u32 *hpd;
3570
3571 /*
3572 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3573 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3574 */
3575 if (rdev->family == CHIP_KAVERI)
3576 rdev->mec.num_mec = 2;
3577 else
3578 rdev->mec.num_mec = 1;
3579 rdev->mec.num_pipe = 4;
3580 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3581
3582 if (rdev->mec.hpd_eop_obj == NULL) {
3583 r = radeon_bo_create(rdev,
3584 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3585 PAGE_SIZE, true,
3586 RADEON_GEM_DOMAIN_GTT, NULL,
3587 &rdev->mec.hpd_eop_obj);
3588 if (r) {
3589 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3590 return r;
3591 }
3592 }
3593
3594 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3595 if (unlikely(r != 0)) {
3596 cik_mec_fini(rdev);
3597 return r;
3598 }
3599 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3600 &rdev->mec.hpd_eop_gpu_addr);
3601 if (r) {
3602 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3603 cik_mec_fini(rdev);
3604 return r;
3605 }
3606 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3607 if (r) {
3608 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3609 cik_mec_fini(rdev);
3610 return r;
3611 }
3612
3613 /* clear memory. Not sure if this is required or not */
3614 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3615
3616 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3617 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3618
3619 return 0;
3620 }
3621
3622 struct hqd_registers
3623 {
3624 u32 cp_mqd_base_addr;
3625 u32 cp_mqd_base_addr_hi;
3626 u32 cp_hqd_active;
3627 u32 cp_hqd_vmid;
3628 u32 cp_hqd_persistent_state;
3629 u32 cp_hqd_pipe_priority;
3630 u32 cp_hqd_queue_priority;
3631 u32 cp_hqd_quantum;
3632 u32 cp_hqd_pq_base;
3633 u32 cp_hqd_pq_base_hi;
3634 u32 cp_hqd_pq_rptr;
3635 u32 cp_hqd_pq_rptr_report_addr;
3636 u32 cp_hqd_pq_rptr_report_addr_hi;
3637 u32 cp_hqd_pq_wptr_poll_addr;
3638 u32 cp_hqd_pq_wptr_poll_addr_hi;
3639 u32 cp_hqd_pq_doorbell_control;
3640 u32 cp_hqd_pq_wptr;
3641 u32 cp_hqd_pq_control;
3642 u32 cp_hqd_ib_base_addr;
3643 u32 cp_hqd_ib_base_addr_hi;
3644 u32 cp_hqd_ib_rptr;
3645 u32 cp_hqd_ib_control;
3646 u32 cp_hqd_iq_timer;
3647 u32 cp_hqd_iq_rptr;
3648 u32 cp_hqd_dequeue_request;
3649 u32 cp_hqd_dma_offload;
3650 u32 cp_hqd_sema_cmd;
3651 u32 cp_hqd_msg_type;
3652 u32 cp_hqd_atomic0_preop_lo;
3653 u32 cp_hqd_atomic0_preop_hi;
3654 u32 cp_hqd_atomic1_preop_lo;
3655 u32 cp_hqd_atomic1_preop_hi;
3656 u32 cp_hqd_hq_scheduler0;
3657 u32 cp_hqd_hq_scheduler1;
3658 u32 cp_mqd_control;
3659 };
3660
3661 struct bonaire_mqd
3662 {
3663 u32 header;
3664 u32 dispatch_initiator;
3665 u32 dimensions[3];
3666 u32 start_idx[3];
3667 u32 num_threads[3];
3668 u32 pipeline_stat_enable;
3669 u32 perf_counter_enable;
3670 u32 pgm[2];
3671 u32 tba[2];
3672 u32 tma[2];
3673 u32 pgm_rsrc[2];
3674 u32 vmid;
3675 u32 resource_limits;
3676 u32 static_thread_mgmt01[2];
3677 u32 tmp_ring_size;
3678 u32 static_thread_mgmt23[2];
3679 u32 restart[3];
3680 u32 thread_trace_enable;
3681 u32 reserved1;
3682 u32 user_data[16];
3683 u32 vgtcs_invoke_count[2];
3684 struct hqd_registers queue_state;
3685 u32 dequeue_cntr;
3686 u32 interrupt_queue[64];
3687 };
3688
3689 /**
3690 * cik_cp_compute_resume - setup the compute queue registers
3691 *
3692 * @rdev: radeon_device pointer
3693 *
3694 * Program the compute queues and test them to make sure they
3695 * are working.
3696 * Returns 0 for success, error for failure.
3697 */
3698 static int cik_cp_compute_resume(struct radeon_device *rdev)
3699 {
3700 int r, i, idx;
3701 u32 tmp;
3702 bool use_doorbell = true;
3703 u64 hqd_gpu_addr;
3704 u64 mqd_gpu_addr;
3705 u64 eop_gpu_addr;
3706 u64 wb_gpu_addr;
3707 u32 *buf;
3708 struct bonaire_mqd *mqd;
3709
3710 r = cik_cp_compute_start(rdev);
3711 if (r)
3712 return r;
3713
3714 /* fix up chicken bits */
3715 tmp = RREG32(CP_CPF_DEBUG);
3716 tmp |= (1 << 23);
3717 WREG32(CP_CPF_DEBUG, tmp);
3718
3719 /* init the pipes */
3720 mutex_lock(&rdev->srbm_mutex);
3721 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3722 int me = (i < 4) ? 1 : 2;
3723 int pipe = (i < 4) ? i : (i - 4);
3724
3725 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3726
3727 cik_srbm_select(rdev, me, pipe, 0, 0);
3728
3729 /* write the EOP addr */
3730 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3731 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3732
3733 /* set the VMID assigned */
3734 WREG32(CP_HPD_EOP_VMID, 0);
3735
3736 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3737 tmp = RREG32(CP_HPD_EOP_CONTROL);
3738 tmp &= ~EOP_SIZE_MASK;
3739 tmp |= drm_order(MEC_HPD_SIZE / 8);
3740 WREG32(CP_HPD_EOP_CONTROL, tmp);
3741 }
3742 cik_srbm_select(rdev, 0, 0, 0, 0);
3743 mutex_unlock(&rdev->srbm_mutex);
3744
3745 /* init the queues. Just two for now. */
3746 for (i = 0; i < 2; i++) {
3747 if (i == 0)
3748 idx = CAYMAN_RING_TYPE_CP1_INDEX;
3749 else
3750 idx = CAYMAN_RING_TYPE_CP2_INDEX;
3751
3752 if (rdev->ring[idx].mqd_obj == NULL) {
3753 r = radeon_bo_create(rdev,
3754 sizeof(struct bonaire_mqd),
3755 PAGE_SIZE, true,
3756 RADEON_GEM_DOMAIN_GTT, NULL,
3757 &rdev->ring[idx].mqd_obj);
3758 if (r) {
3759 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3760 return r;
3761 }
3762 }
3763
3764 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3765 if (unlikely(r != 0)) {
3766 cik_cp_compute_fini(rdev);
3767 return r;
3768 }
3769 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3770 &mqd_gpu_addr);
3771 if (r) {
3772 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3773 cik_cp_compute_fini(rdev);
3774 return r;
3775 }
3776 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3777 if (r) {
3778 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3779 cik_cp_compute_fini(rdev);
3780 return r;
3781 }
3782
3783 /* doorbell offset */
3784 rdev->ring[idx].doorbell_offset =
3785 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3786
3787 /* init the mqd struct */
3788 memset(buf, 0, sizeof(struct bonaire_mqd));
3789
3790 mqd = (struct bonaire_mqd *)buf;
3791 mqd->header = 0xC0310800;
3792 mqd->static_thread_mgmt01[0] = 0xffffffff;
3793 mqd->static_thread_mgmt01[1] = 0xffffffff;
3794 mqd->static_thread_mgmt23[0] = 0xffffffff;
3795 mqd->static_thread_mgmt23[1] = 0xffffffff;
3796
3797 mutex_lock(&rdev->srbm_mutex);
3798 cik_srbm_select(rdev, rdev->ring[idx].me,
3799 rdev->ring[idx].pipe,
3800 rdev->ring[idx].queue, 0);
3801
3802 /* disable wptr polling */
3803 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3804 tmp &= ~WPTR_POLL_EN;
3805 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3806
3807 /* enable doorbell? */
3808 mqd->queue_state.cp_hqd_pq_doorbell_control =
3809 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3810 if (use_doorbell)
3811 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3812 else
3813 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3814 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3815 mqd->queue_state.cp_hqd_pq_doorbell_control);
3816
3817 /* disable the queue if it's active */
3818 mqd->queue_state.cp_hqd_dequeue_request = 0;
3819 mqd->queue_state.cp_hqd_pq_rptr = 0;
3820 mqd->queue_state.cp_hqd_pq_wptr= 0;
3821 if (RREG32(CP_HQD_ACTIVE) & 1) {
3822 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3823 for (i = 0; i < rdev->usec_timeout; i++) {
3824 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3825 break;
3826 udelay(1);
3827 }
3828 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3829 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3830 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3831 }
3832
3833 /* set the pointer to the MQD */
3834 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3835 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3836 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3837 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3838 /* set MQD vmid to 0 */
3839 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3840 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3841 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3842
3843 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3844 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3845 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3846 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3847 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3848 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3849
3850 /* set up the HQD, this is similar to CP_RB0_CNTL */
3851 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3852 mqd->queue_state.cp_hqd_pq_control &=
3853 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3854
3855 mqd->queue_state.cp_hqd_pq_control |=
3856 drm_order(rdev->ring[idx].ring_size / 8);
3857 mqd->queue_state.cp_hqd_pq_control |=
3858 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3859 #ifdef __BIG_ENDIAN
3860 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3861 #endif
3862 mqd->queue_state.cp_hqd_pq_control &=
3863 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3864 mqd->queue_state.cp_hqd_pq_control |=
3865 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3866 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3867
3868 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3869 if (i == 0)
3870 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3871 else
3872 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3873 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3874 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3875 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3876 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3877 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3878
3879 /* set the wb address wether it's enabled or not */
3880 if (i == 0)
3881 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3882 else
3883 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3884 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3885 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3886 upper_32_bits(wb_gpu_addr) & 0xffff;
3887 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3888 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3889 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3890 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3891
3892 /* enable the doorbell if requested */
3893 if (use_doorbell) {
3894 mqd->queue_state.cp_hqd_pq_doorbell_control =
3895 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3896 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3897 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3898 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3899 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3900 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3901 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3902
3903 } else {
3904 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3905 }
3906 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3907 mqd->queue_state.cp_hqd_pq_doorbell_control);
3908
3909 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3910 rdev->ring[idx].wptr = 0;
3911 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3912 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3913 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3914 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3915
3916 /* set the vmid for the queue */
3917 mqd->queue_state.cp_hqd_vmid = 0;
3918 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3919
3920 /* activate the queue */
3921 mqd->queue_state.cp_hqd_active = 1;
3922 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3923
3924 cik_srbm_select(rdev, 0, 0, 0, 0);
3925 mutex_unlock(&rdev->srbm_mutex);
3926
3927 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3928 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3929
3930 rdev->ring[idx].ready = true;
3931 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3932 if (r)
3933 rdev->ring[idx].ready = false;
3934 }
3935
3936 return 0;
3937 }
3938
3939 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3940 {
3941 cik_cp_gfx_enable(rdev, enable);
3942 cik_cp_compute_enable(rdev, enable);
3943 }
3944
3945 static int cik_cp_load_microcode(struct radeon_device *rdev)
3946 {
3947 int r;
3948
3949 r = cik_cp_gfx_load_microcode(rdev);
3950 if (r)
3951 return r;
3952 r = cik_cp_compute_load_microcode(rdev);
3953 if (r)
3954 return r;
3955
3956 return 0;
3957 }
3958
3959 static void cik_cp_fini(struct radeon_device *rdev)
3960 {
3961 cik_cp_gfx_fini(rdev);
3962 cik_cp_compute_fini(rdev);
3963 }
3964
3965 static int cik_cp_resume(struct radeon_device *rdev)
3966 {
3967 int r;
3968
3969 /* Reset all cp blocks */
3970 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3971 RREG32(GRBM_SOFT_RESET);
3972 mdelay(15);
3973 WREG32(GRBM_SOFT_RESET, 0);
3974 RREG32(GRBM_SOFT_RESET);
3975
3976 r = cik_cp_load_microcode(rdev);
3977 if (r)
3978 return r;
3979
3980 r = cik_cp_gfx_resume(rdev);
3981 if (r)
3982 return r;
3983 r = cik_cp_compute_resume(rdev);
3984 if (r)
3985 return r;
3986
3987 return 0;
3988 }
3989
3990 /*
3991 * sDMA - System DMA
3992 * Starting with CIK, the GPU has new asynchronous
3993 * DMA engines. These engines are used for compute
3994 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3995 * and each one supports 1 ring buffer used for gfx
3996 * and 2 queues used for compute.
3997 *
3998 * The programming model is very similar to the CP
3999 * (ring buffer, IBs, etc.), but sDMA has it's own
4000 * packet format that is different from the PM4 format
4001 * used by the CP. sDMA supports copying data, writing
4002 * embedded data, solid fills, and a number of other
4003 * things. It also has support for tiling/detiling of
4004 * buffers.
4005 */
4006 /**
4007 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
4008 *
4009 * @rdev: radeon_device pointer
4010 * @ib: IB object to schedule
4011 *
4012 * Schedule an IB in the DMA ring (CIK).
4013 */
4014 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
4015 struct radeon_ib *ib)
4016 {
4017 struct radeon_ring *ring = &rdev->ring[ib->ring];
4018 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
4019
4020 if (rdev->wb.enabled) {
4021 u32 next_rptr = ring->wptr + 5;
4022 while ((next_rptr & 7) != 4)
4023 next_rptr++;
4024 next_rptr += 4;
4025 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4026 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4027 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4028 radeon_ring_write(ring, 1); /* number of DWs to follow */
4029 radeon_ring_write(ring, next_rptr);
4030 }
4031
4032 /* IB packet must end on a 8 DW boundary */
4033 while ((ring->wptr & 7) != 4)
4034 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
4035 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
4036 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
4037 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
4038 radeon_ring_write(ring, ib->length_dw);
4039
4040 }
4041
4042 /**
4043 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
4044 *
4045 * @rdev: radeon_device pointer
4046 * @fence: radeon fence object
4047 *
4048 * Add a DMA fence packet to the ring to write
4049 * the fence seq number and DMA trap packet to generate
4050 * an interrupt if needed (CIK).
4051 */
4052 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
4053 struct radeon_fence *fence)
4054 {
4055 struct radeon_ring *ring = &rdev->ring[fence->ring];
4056 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4057 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4058 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4059 u32 ref_and_mask;
4060
4061 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
4062 ref_and_mask = SDMA0;
4063 else
4064 ref_and_mask = SDMA1;
4065
4066 /* write the fence */
4067 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
4068 radeon_ring_write(ring, addr & 0xffffffff);
4069 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4070 radeon_ring_write(ring, fence->seq);
4071 /* generate an interrupt */
4072 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
4073 /* flush HDP */
4074 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4075 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4076 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4077 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4078 radeon_ring_write(ring, ref_and_mask); /* MASK */
4079 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4080 }
4081
4082 /**
4083 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
4084 *
4085 * @rdev: radeon_device pointer
4086 * @ring: radeon_ring structure holding ring information
4087 * @semaphore: radeon semaphore object
4088 * @emit_wait: wait or signal semaphore
4089 *
4090 * Add a DMA semaphore packet to the ring wait on or signal
4091 * other rings (CIK).
4092 */
4093 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
4094 struct radeon_ring *ring,
4095 struct radeon_semaphore *semaphore,
4096 bool emit_wait)
4097 {
4098 u64 addr = semaphore->gpu_addr;
4099 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
4100
4101 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
4102 radeon_ring_write(ring, addr & 0xfffffff8);
4103 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4104 }
4105
4106 /**
4107 * cik_sdma_gfx_stop - stop the gfx async dma engines
4108 *
4109 * @rdev: radeon_device pointer
4110 *
4111 * Stop the gfx async dma ring buffers (CIK).
4112 */
4113 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
4114 {
4115 u32 rb_cntl, reg_offset;
4116 int i;
4117
4118 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4119
4120 for (i = 0; i < 2; i++) {
4121 if (i == 0)
4122 reg_offset = SDMA0_REGISTER_OFFSET;
4123 else
4124 reg_offset = SDMA1_REGISTER_OFFSET;
4125 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
4126 rb_cntl &= ~SDMA_RB_ENABLE;
4127 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4128 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
4129 }
4130 }
4131
4132 /**
4133 * cik_sdma_rlc_stop - stop the compute async dma engines
4134 *
4135 * @rdev: radeon_device pointer
4136 *
4137 * Stop the compute async dma queues (CIK).
4138 */
4139 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
4140 {
4141 /* XXX todo */
4142 }
4143
4144 /**
4145 * cik_sdma_enable - stop the async dma engines
4146 *
4147 * @rdev: radeon_device pointer
4148 * @enable: enable/disable the DMA MEs.
4149 *
4150 * Halt or unhalt the async dma engines (CIK).
4151 */
4152 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
4153 {
4154 u32 me_cntl, reg_offset;
4155 int i;
4156
4157 for (i = 0; i < 2; i++) {
4158 if (i == 0)
4159 reg_offset = SDMA0_REGISTER_OFFSET;
4160 else
4161 reg_offset = SDMA1_REGISTER_OFFSET;
4162 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
4163 if (enable)
4164 me_cntl &= ~SDMA_HALT;
4165 else
4166 me_cntl |= SDMA_HALT;
4167 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
4168 }
4169 }
4170
4171 /**
4172 * cik_sdma_gfx_resume - setup and start the async dma engines
4173 *
4174 * @rdev: radeon_device pointer
4175 *
4176 * Set up the gfx DMA ring buffers and enable them (CIK).
4177 * Returns 0 for success, error for failure.
4178 */
4179 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
4180 {
4181 struct radeon_ring *ring;
4182 u32 rb_cntl, ib_cntl;
4183 u32 rb_bufsz;
4184 u32 reg_offset, wb_offset;
4185 int i, r;
4186
4187 for (i = 0; i < 2; i++) {
4188 if (i == 0) {
4189 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
4190 reg_offset = SDMA0_REGISTER_OFFSET;
4191 wb_offset = R600_WB_DMA_RPTR_OFFSET;
4192 } else {
4193 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
4194 reg_offset = SDMA1_REGISTER_OFFSET;
4195 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
4196 }
4197
4198 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
4199 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
4200
4201 /* Set ring buffer size in dwords */
4202 rb_bufsz = drm_order(ring->ring_size / 4);
4203 rb_cntl = rb_bufsz << 1;
4204 #ifdef __BIG_ENDIAN
4205 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
4206 #endif
4207 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
4208
4209 /* Initialize the ring buffer's read and write pointers */
4210 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
4211 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
4212
4213 /* set the wb address whether it's enabled or not */
4214 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
4215 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
4216 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
4217 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
4218
4219 if (rdev->wb.enabled)
4220 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
4221
4222 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
4223 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
4224
4225 ring->wptr = 0;
4226 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
4227
4228 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
4229
4230 /* enable DMA RB */
4231 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
4232
4233 ib_cntl = SDMA_IB_ENABLE;
4234 #ifdef __BIG_ENDIAN
4235 ib_cntl |= SDMA_IB_SWAP_ENABLE;
4236 #endif
4237 /* enable DMA IBs */
4238 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
4239
4240 ring->ready = true;
4241
4242 r = radeon_ring_test(rdev, ring->idx, ring);
4243 if (r) {
4244 ring->ready = false;
4245 return r;
4246 }
4247 }
4248
4249 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4250
4251 return 0;
4252 }
4253
4254 /**
4255 * cik_sdma_rlc_resume - setup and start the async dma engines
4256 *
4257 * @rdev: radeon_device pointer
4258 *
4259 * Set up the compute DMA queues and enable them (CIK).
4260 * Returns 0 for success, error for failure.
4261 */
4262 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
4263 {
4264 /* XXX todo */
4265 return 0;
4266 }
4267
4268 /**
4269 * cik_sdma_load_microcode - load the sDMA ME ucode
4270 *
4271 * @rdev: radeon_device pointer
4272 *
4273 * Loads the sDMA0/1 ucode.
4274 * Returns 0 for success, -EINVAL if the ucode is not available.
4275 */
4276 static int cik_sdma_load_microcode(struct radeon_device *rdev)
4277 {
4278 const __be32 *fw_data;
4279 int i;
4280
4281 if (!rdev->sdma_fw)
4282 return -EINVAL;
4283
4284 /* stop the gfx rings and rlc compute queues */
4285 cik_sdma_gfx_stop(rdev);
4286 cik_sdma_rlc_stop(rdev);
4287
4288 /* halt the MEs */
4289 cik_sdma_enable(rdev, false);
4290
4291 /* sdma0 */
4292 fw_data = (const __be32 *)rdev->sdma_fw->data;
4293 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4294 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4295 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4296 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4297
4298 /* sdma1 */
4299 fw_data = (const __be32 *)rdev->sdma_fw->data;
4300 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4301 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
4302 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
4303 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
4304
4305 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
4306 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
4307 return 0;
4308 }
4309
4310 /**
4311 * cik_sdma_resume - setup and start the async dma engines
4312 *
4313 * @rdev: radeon_device pointer
4314 *
4315 * Set up the DMA engines and enable them (CIK).
4316 * Returns 0 for success, error for failure.
4317 */
4318 static int cik_sdma_resume(struct radeon_device *rdev)
4319 {
4320 int r;
4321
4322 /* Reset dma */
4323 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
4324 RREG32(SRBM_SOFT_RESET);
4325 udelay(50);
4326 WREG32(SRBM_SOFT_RESET, 0);
4327 RREG32(SRBM_SOFT_RESET);
4328
4329 r = cik_sdma_load_microcode(rdev);
4330 if (r)
4331 return r;
4332
4333 /* unhalt the MEs */
4334 cik_sdma_enable(rdev, true);
4335
4336 /* start the gfx rings and rlc compute queues */
4337 r = cik_sdma_gfx_resume(rdev);
4338 if (r)
4339 return r;
4340 r = cik_sdma_rlc_resume(rdev);
4341 if (r)
4342 return r;
4343
4344 return 0;
4345 }
4346
4347 /**
4348 * cik_sdma_fini - tear down the async dma engines
4349 *
4350 * @rdev: radeon_device pointer
4351 *
4352 * Stop the async dma engines and free the rings (CIK).
4353 */
4354 static void cik_sdma_fini(struct radeon_device *rdev)
4355 {
4356 /* stop the gfx rings and rlc compute queues */
4357 cik_sdma_gfx_stop(rdev);
4358 cik_sdma_rlc_stop(rdev);
4359 /* halt the MEs */
4360 cik_sdma_enable(rdev, false);
4361 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
4362 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
4363 /* XXX - compute dma queue tear down */
4364 }
4365
4366 /**
4367 * cik_copy_dma - copy pages using the DMA engine
4368 *
4369 * @rdev: radeon_device pointer
4370 * @src_offset: src GPU address
4371 * @dst_offset: dst GPU address
4372 * @num_gpu_pages: number of GPU pages to xfer
4373 * @fence: radeon fence object
4374 *
4375 * Copy GPU paging using the DMA engine (CIK).
4376 * Used by the radeon ttm implementation to move pages if
4377 * registered as the asic copy callback.
4378 */
4379 int cik_copy_dma(struct radeon_device *rdev,
4380 uint64_t src_offset, uint64_t dst_offset,
4381 unsigned num_gpu_pages,
4382 struct radeon_fence **fence)
4383 {
4384 struct radeon_semaphore *sem = NULL;
4385 int ring_index = rdev->asic->copy.dma_ring_index;
4386 struct radeon_ring *ring = &rdev->ring[ring_index];
4387 u32 size_in_bytes, cur_size_in_bytes;
4388 int i, num_loops;
4389 int r = 0;
4390
4391 r = radeon_semaphore_create(rdev, &sem);
4392 if (r) {
4393 DRM_ERROR("radeon: moving bo (%d).\n", r);
4394 return r;
4395 }
4396
4397 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4398 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4399 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
4400 if (r) {
4401 DRM_ERROR("radeon: moving bo (%d).\n", r);
4402 radeon_semaphore_free(rdev, &sem, NULL);
4403 return r;
4404 }
4405
4406 if (radeon_fence_need_sync(*fence, ring->idx)) {
4407 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
4408 ring->idx);
4409 radeon_fence_note_sync(*fence, ring->idx);
4410 } else {
4411 radeon_semaphore_free(rdev, &sem, NULL);
4412 }
4413
4414 for (i = 0; i < num_loops; i++) {
4415 cur_size_in_bytes = size_in_bytes;
4416 if (cur_size_in_bytes > 0x1fffff)
4417 cur_size_in_bytes = 0x1fffff;
4418 size_in_bytes -= cur_size_in_bytes;
4419 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
4420 radeon_ring_write(ring, cur_size_in_bytes);
4421 radeon_ring_write(ring, 0); /* src/dst endian swap */
4422 radeon_ring_write(ring, src_offset & 0xffffffff);
4423 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
4424 radeon_ring_write(ring, dst_offset & 0xfffffffc);
4425 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
4426 src_offset += cur_size_in_bytes;
4427 dst_offset += cur_size_in_bytes;
4428 }
4429
4430 r = radeon_fence_emit(rdev, fence, ring->idx);
4431 if (r) {
4432 radeon_ring_unlock_undo(rdev, ring);
4433 return r;
4434 }
4435
4436 radeon_ring_unlock_commit(rdev, ring);
4437 radeon_semaphore_free(rdev, &sem, *fence);
4438
4439 return r;
4440 }
4441
4442 /**
4443 * cik_sdma_ring_test - simple async dma engine test
4444 *
4445 * @rdev: radeon_device pointer
4446 * @ring: radeon_ring structure holding ring information
4447 *
4448 * Test the DMA engine by writing using it to write an
4449 * value to memory. (CIK).
4450 * Returns 0 for success, error for failure.
4451 */
4452 int cik_sdma_ring_test(struct radeon_device *rdev,
4453 struct radeon_ring *ring)
4454 {
4455 unsigned i;
4456 int r;
4457 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4458 u32 tmp;
4459
4460 if (!ptr) {
4461 DRM_ERROR("invalid vram scratch pointer\n");
4462 return -EINVAL;
4463 }
4464
4465 tmp = 0xCAFEDEAD;
4466 writel(tmp, ptr);
4467
4468 r = radeon_ring_lock(rdev, ring, 4);
4469 if (r) {
4470 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
4471 return r;
4472 }
4473 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
4474 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
4475 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
4476 radeon_ring_write(ring, 1); /* number of DWs to follow */
4477 radeon_ring_write(ring, 0xDEADBEEF);
4478 radeon_ring_unlock_commit(rdev, ring);
4479
4480 for (i = 0; i < rdev->usec_timeout; i++) {
4481 tmp = readl(ptr);
4482 if (tmp == 0xDEADBEEF)
4483 break;
4484 DRM_UDELAY(1);
4485 }
4486
4487 if (i < rdev->usec_timeout) {
4488 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
4489 } else {
4490 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
4491 ring->idx, tmp);
4492 r = -EINVAL;
4493 }
4494 return r;
4495 }
4496
4497 /**
4498 * cik_sdma_ib_test - test an IB on the DMA engine
4499 *
4500 * @rdev: radeon_device pointer
4501 * @ring: radeon_ring structure holding ring information
4502 *
4503 * Test a simple IB in the DMA ring (CIK).
4504 * Returns 0 on success, error on failure.
4505 */
4506 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4507 {
4508 struct radeon_ib ib;
4509 unsigned i;
4510 int r;
4511 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
4512 u32 tmp = 0;
4513
4514 if (!ptr) {
4515 DRM_ERROR("invalid vram scratch pointer\n");
4516 return -EINVAL;
4517 }
4518
4519 tmp = 0xCAFEDEAD;
4520 writel(tmp, ptr);
4521
4522 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4523 if (r) {
4524 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4525 return r;
4526 }
4527
4528 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4529 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
4530 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
4531 ib.ptr[3] = 1;
4532 ib.ptr[4] = 0xDEADBEEF;
4533 ib.length_dw = 5;
4534
4535 r = radeon_ib_schedule(rdev, &ib, NULL);
4536 if (r) {
4537 radeon_ib_free(rdev, &ib);
4538 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4539 return r;
4540 }
4541 r = radeon_fence_wait(ib.fence, false);
4542 if (r) {
4543 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4544 return r;
4545 }
4546 for (i = 0; i < rdev->usec_timeout; i++) {
4547 tmp = readl(ptr);
4548 if (tmp == 0xDEADBEEF)
4549 break;
4550 DRM_UDELAY(1);
4551 }
4552 if (i < rdev->usec_timeout) {
4553 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4554 } else {
4555 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
4556 r = -EINVAL;
4557 }
4558 radeon_ib_free(rdev, &ib);
4559 return r;
4560 }
4561
4562
4563 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4564 {
4565 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
4566 RREG32(GRBM_STATUS));
4567 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
4568 RREG32(GRBM_STATUS2));
4569 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
4570 RREG32(GRBM_STATUS_SE0));
4571 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
4572 RREG32(GRBM_STATUS_SE1));
4573 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
4574 RREG32(GRBM_STATUS_SE2));
4575 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
4576 RREG32(GRBM_STATUS_SE3));
4577 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
4578 RREG32(SRBM_STATUS));
4579 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
4580 RREG32(SRBM_STATUS2));
4581 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
4582 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4583 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
4584 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4585 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4586 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
4587 RREG32(CP_STALLED_STAT1));
4588 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
4589 RREG32(CP_STALLED_STAT2));
4590 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
4591 RREG32(CP_STALLED_STAT3));
4592 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
4593 RREG32(CP_CPF_BUSY_STAT));
4594 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
4595 RREG32(CP_CPF_STALLED_STAT1));
4596 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4597 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4598 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
4599 RREG32(CP_CPC_STALLED_STAT1));
4600 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4601 }
4602
4603 /**
4604 * cik_gpu_check_soft_reset - check which blocks are busy
4605 *
4606 * @rdev: radeon_device pointer
4607 *
4608 * Check which blocks are busy and return the relevant reset
4609 * mask to be used by cik_gpu_soft_reset().
4610 * Returns a mask of the blocks to be reset.
4611 */
4612 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4613 {
4614 u32 reset_mask = 0;
4615 u32 tmp;
4616
4617 /* GRBM_STATUS */
4618 tmp = RREG32(GRBM_STATUS);
4619 if (tmp & (PA_BUSY | SC_BUSY |
4620 BCI_BUSY | SX_BUSY |
4621 TA_BUSY | VGT_BUSY |
4622 DB_BUSY | CB_BUSY |
4623 GDS_BUSY | SPI_BUSY |
4624 IA_BUSY | IA_BUSY_NO_DMA))
4625 reset_mask |= RADEON_RESET_GFX;
4626
4627 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4628 reset_mask |= RADEON_RESET_CP;
4629
4630 /* GRBM_STATUS2 */
4631 tmp = RREG32(GRBM_STATUS2);
4632 if (tmp & RLC_BUSY)
4633 reset_mask |= RADEON_RESET_RLC;
4634
4635 /* SDMA0_STATUS_REG */
4636 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4637 if (!(tmp & SDMA_IDLE))
4638 reset_mask |= RADEON_RESET_DMA;
4639
4640 /* SDMA1_STATUS_REG */
4641 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4642 if (!(tmp & SDMA_IDLE))
4643 reset_mask |= RADEON_RESET_DMA1;
4644
4645 /* SRBM_STATUS2 */
4646 tmp = RREG32(SRBM_STATUS2);
4647 if (tmp & SDMA_BUSY)
4648 reset_mask |= RADEON_RESET_DMA;
4649
4650 if (tmp & SDMA1_BUSY)
4651 reset_mask |= RADEON_RESET_DMA1;
4652
4653 /* SRBM_STATUS */
4654 tmp = RREG32(SRBM_STATUS);
4655
4656 if (tmp & IH_BUSY)
4657 reset_mask |= RADEON_RESET_IH;
4658
4659 if (tmp & SEM_BUSY)
4660 reset_mask |= RADEON_RESET_SEM;
4661
4662 if (tmp & GRBM_RQ_PENDING)
4663 reset_mask |= RADEON_RESET_GRBM;
4664
4665 if (tmp & VMC_BUSY)
4666 reset_mask |= RADEON_RESET_VMC;
4667
4668 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4669 MCC_BUSY | MCD_BUSY))
4670 reset_mask |= RADEON_RESET_MC;
4671
4672 if (evergreen_is_display_hung(rdev))
4673 reset_mask |= RADEON_RESET_DISPLAY;
4674
4675 /* Skip MC reset as it's mostly likely not hung, just busy */
4676 if (reset_mask & RADEON_RESET_MC) {
4677 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4678 reset_mask &= ~RADEON_RESET_MC;
4679 }
4680
4681 return reset_mask;
4682 }
4683
4684 /**
4685 * cik_gpu_soft_reset - soft reset GPU
4686 *
4687 * @rdev: radeon_device pointer
4688 * @reset_mask: mask of which blocks to reset
4689 *
4690 * Soft reset the blocks specified in @reset_mask.
4691 */
4692 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4693 {
4694 struct evergreen_mc_save save;
4695 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4696 u32 tmp;
4697
4698 if (reset_mask == 0)
4699 return;
4700
4701 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4702
4703 cik_print_gpu_status_regs(rdev);
4704 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
4705 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4706 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4707 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4708
4709 /* stop the rlc */
4710 cik_rlc_stop(rdev);
4711
4712 /* Disable GFX parsing/prefetching */
4713 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4714
4715 /* Disable MEC parsing/prefetching */
4716 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4717
4718 if (reset_mask & RADEON_RESET_DMA) {
4719 /* sdma0 */
4720 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4721 tmp |= SDMA_HALT;
4722 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4723 }
4724 if (reset_mask & RADEON_RESET_DMA1) {
4725 /* sdma1 */
4726 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4727 tmp |= SDMA_HALT;
4728 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4729 }
4730
4731 evergreen_mc_stop(rdev, &save);
4732 if (evergreen_mc_wait_for_idle(rdev)) {
4733 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4734 }
4735
4736 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4737 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4738
4739 if (reset_mask & RADEON_RESET_CP) {
4740 grbm_soft_reset |= SOFT_RESET_CP;
4741
4742 srbm_soft_reset |= SOFT_RESET_GRBM;
4743 }
4744
4745 if (reset_mask & RADEON_RESET_DMA)
4746 srbm_soft_reset |= SOFT_RESET_SDMA;
4747
4748 if (reset_mask & RADEON_RESET_DMA1)
4749 srbm_soft_reset |= SOFT_RESET_SDMA1;
4750
4751 if (reset_mask & RADEON_RESET_DISPLAY)
4752 srbm_soft_reset |= SOFT_RESET_DC;
4753
4754 if (reset_mask & RADEON_RESET_RLC)
4755 grbm_soft_reset |= SOFT_RESET_RLC;
4756
4757 if (reset_mask & RADEON_RESET_SEM)
4758 srbm_soft_reset |= SOFT_RESET_SEM;
4759
4760 if (reset_mask & RADEON_RESET_IH)
4761 srbm_soft_reset |= SOFT_RESET_IH;
4762
4763 if (reset_mask & RADEON_RESET_GRBM)
4764 srbm_soft_reset |= SOFT_RESET_GRBM;
4765
4766 if (reset_mask & RADEON_RESET_VMC)
4767 srbm_soft_reset |= SOFT_RESET_VMC;
4768
4769 if (!(rdev->flags & RADEON_IS_IGP)) {
4770 if (reset_mask & RADEON_RESET_MC)
4771 srbm_soft_reset |= SOFT_RESET_MC;
4772 }
4773
4774 if (grbm_soft_reset) {
4775 tmp = RREG32(GRBM_SOFT_RESET);
4776 tmp |= grbm_soft_reset;
4777 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4778 WREG32(GRBM_SOFT_RESET, tmp);
4779 tmp = RREG32(GRBM_SOFT_RESET);
4780
4781 udelay(50);
4782
4783 tmp &= ~grbm_soft_reset;
4784 WREG32(GRBM_SOFT_RESET, tmp);
4785 tmp = RREG32(GRBM_SOFT_RESET);
4786 }
4787
4788 if (srbm_soft_reset) {
4789 tmp = RREG32(SRBM_SOFT_RESET);
4790 tmp |= srbm_soft_reset;
4791 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4792 WREG32(SRBM_SOFT_RESET, tmp);
4793 tmp = RREG32(SRBM_SOFT_RESET);
4794
4795 udelay(50);
4796
4797 tmp &= ~srbm_soft_reset;
4798 WREG32(SRBM_SOFT_RESET, tmp);
4799 tmp = RREG32(SRBM_SOFT_RESET);
4800 }
4801
4802 /* Wait a little for things to settle down */
4803 udelay(50);
4804
4805 evergreen_mc_resume(rdev, &save);
4806 udelay(50);
4807
4808 cik_print_gpu_status_regs(rdev);
4809 }
4810
4811 /**
4812 * cik_asic_reset - soft reset GPU
4813 *
4814 * @rdev: radeon_device pointer
4815 *
4816 * Look up which blocks are hung and attempt
4817 * to reset them.
4818 * Returns 0 for success.
4819 */
4820 int cik_asic_reset(struct radeon_device *rdev)
4821 {
4822 u32 reset_mask;
4823
4824 reset_mask = cik_gpu_check_soft_reset(rdev);
4825
4826 if (reset_mask)
4827 r600_set_bios_scratch_engine_hung(rdev, true);
4828
4829 cik_gpu_soft_reset(rdev, reset_mask);
4830
4831 reset_mask = cik_gpu_check_soft_reset(rdev);
4832
4833 if (!reset_mask)
4834 r600_set_bios_scratch_engine_hung(rdev, false);
4835
4836 return 0;
4837 }
4838
4839 /**
4840 * cik_gfx_is_lockup - check if the 3D engine is locked up
4841 *
4842 * @rdev: radeon_device pointer
4843 * @ring: radeon_ring structure holding ring information
4844 *
4845 * Check if the 3D engine is locked up (CIK).
4846 * Returns true if the engine is locked, false if not.
4847 */
4848 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4849 {
4850 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4851
4852 if (!(reset_mask & (RADEON_RESET_GFX |
4853 RADEON_RESET_COMPUTE |
4854 RADEON_RESET_CP))) {
4855 radeon_ring_lockup_update(ring);
4856 return false;
4857 }
4858 /* force CP activities */
4859 radeon_ring_force_activity(rdev, ring);
4860 return radeon_ring_test_lockup(rdev, ring);
4861 }
4862
4863 /**
4864 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4865 *
4866 * @rdev: radeon_device pointer
4867 * @ring: radeon_ring structure holding ring information
4868 *
4869 * Check if the async DMA engine is locked up (CIK).
4870 * Returns true if the engine appears to be locked up, false if not.
4871 */
4872 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4873 {
4874 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4875 u32 mask;
4876
4877 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4878 mask = RADEON_RESET_DMA;
4879 else
4880 mask = RADEON_RESET_DMA1;
4881
4882 if (!(reset_mask & mask)) {
4883 radeon_ring_lockup_update(ring);
4884 return false;
4885 }
4886 /* force ring activities */
4887 radeon_ring_force_activity(rdev, ring);
4888 return radeon_ring_test_lockup(rdev, ring);
4889 }
4890
4891 /* MC */
4892 /**
4893 * cik_mc_program - program the GPU memory controller
4894 *
4895 * @rdev: radeon_device pointer
4896 *
4897 * Set the location of vram, gart, and AGP in the GPU's
4898 * physical address space (CIK).
4899 */
4900 static void cik_mc_program(struct radeon_device *rdev)
4901 {
4902 struct evergreen_mc_save save;
4903 u32 tmp;
4904 int i, j;
4905
4906 /* Initialize HDP */
4907 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4908 WREG32((0x2c14 + j), 0x00000000);
4909 WREG32((0x2c18 + j), 0x00000000);
4910 WREG32((0x2c1c + j), 0x00000000);
4911 WREG32((0x2c20 + j), 0x00000000);
4912 WREG32((0x2c24 + j), 0x00000000);
4913 }
4914 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4915
4916 evergreen_mc_stop(rdev, &save);
4917 if (radeon_mc_wait_for_idle(rdev)) {
4918 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4919 }
4920 /* Lockout access through VGA aperture*/
4921 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4922 /* Update configuration */
4923 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4924 rdev->mc.vram_start >> 12);
4925 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4926 rdev->mc.vram_end >> 12);
4927 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4928 rdev->vram_scratch.gpu_addr >> 12);
4929 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4930 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4931 WREG32(MC_VM_FB_LOCATION, tmp);
4932 /* XXX double check these! */
4933 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4934 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4935 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4936 WREG32(MC_VM_AGP_BASE, 0);
4937 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4938 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4939 if (radeon_mc_wait_for_idle(rdev)) {
4940 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4941 }
4942 evergreen_mc_resume(rdev, &save);
4943 /* we need to own VRAM, so turn off the VGA renderer here
4944 * to stop it overwriting our objects */
4945 rv515_vga_render_disable(rdev);
4946 }
4947
4948 /**
4949 * cik_mc_init - initialize the memory controller driver params
4950 *
4951 * @rdev: radeon_device pointer
4952 *
4953 * Look up the amount of vram, vram width, and decide how to place
4954 * vram and gart within the GPU's physical address space (CIK).
4955 * Returns 0 for success.
4956 */
4957 static int cik_mc_init(struct radeon_device *rdev)
4958 {
4959 u32 tmp;
4960 int chansize, numchan;
4961
4962 /* Get VRAM informations */
4963 rdev->mc.vram_is_ddr = true;
4964 tmp = RREG32(MC_ARB_RAMCFG);
4965 if (tmp & CHANSIZE_MASK) {
4966 chansize = 64;
4967 } else {
4968 chansize = 32;
4969 }
4970 tmp = RREG32(MC_SHARED_CHMAP);
4971 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4972 case 0:
4973 default:
4974 numchan = 1;
4975 break;
4976 case 1:
4977 numchan = 2;
4978 break;
4979 case 2:
4980 numchan = 4;
4981 break;
4982 case 3:
4983 numchan = 8;
4984 break;
4985 case 4:
4986 numchan = 3;
4987 break;
4988 case 5:
4989 numchan = 6;
4990 break;
4991 case 6:
4992 numchan = 10;
4993 break;
4994 case 7:
4995 numchan = 12;
4996 break;
4997 case 8:
4998 numchan = 16;
4999 break;
5000 }
5001 rdev->mc.vram_width = numchan * chansize;
5002 /* Could aper size report 0 ? */
5003 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5004 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5005 /* size in MB on si */
5006 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5007 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
5008 rdev->mc.visible_vram_size = rdev->mc.aper_size;
5009 si_vram_gtt_location(rdev, &rdev->mc);
5010 radeon_update_bandwidth_info(rdev);
5011
5012 return 0;
5013 }
5014
5015 /*
5016 * GART
5017 * VMID 0 is the physical GPU addresses as used by the kernel.
5018 * VMIDs 1-15 are used for userspace clients and are handled
5019 * by the radeon vm/hsa code.
5020 */
5021 /**
5022 * cik_pcie_gart_tlb_flush - gart tlb flush callback
5023 *
5024 * @rdev: radeon_device pointer
5025 *
5026 * Flush the TLB for the VMID 0 page table (CIK).
5027 */
5028 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5029 {
5030 /* flush hdp cache */
5031 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5032
5033 /* bits 0-15 are the VM contexts0-15 */
5034 WREG32(VM_INVALIDATE_REQUEST, 0x1);
5035 }
5036
5037 /**
5038 * cik_pcie_gart_enable - gart enable
5039 *
5040 * @rdev: radeon_device pointer
5041 *
5042 * This sets up the TLBs, programs the page tables for VMID0,
5043 * sets up the hw for VMIDs 1-15 which are allocated on
5044 * demand, and sets up the global locations for the LDS, GDS,
5045 * and GPUVM for FSA64 clients (CIK).
5046 * Returns 0 for success, errors for failure.
5047 */
5048 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5049 {
5050 int r, i;
5051
5052 if (rdev->gart.robj == NULL) {
5053 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5054 return -EINVAL;
5055 }
5056 r = radeon_gart_table_vram_pin(rdev);
5057 if (r)
5058 return r;
5059 radeon_gart_restore(rdev);
5060 /* Setup TLB control */
5061 WREG32(MC_VM_MX_L1_TLB_CNTL,
5062 (0xA << 7) |
5063 ENABLE_L1_TLB |
5064 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5065 ENABLE_ADVANCED_DRIVER_MODEL |
5066 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5067 /* Setup L2 cache */
5068 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5069 ENABLE_L2_FRAGMENT_PROCESSING |
5070 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5071 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5072 EFFECTIVE_L2_QUEUE_SIZE(7) |
5073 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5074 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5075 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5076 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5077 /* setup context0 */
5078 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5079 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5080 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5081 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5082 (u32)(rdev->dummy_page.addr >> 12));
5083 WREG32(VM_CONTEXT0_CNTL2, 0);
5084 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5085 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5086
5087 WREG32(0x15D4, 0);
5088 WREG32(0x15D8, 0);
5089 WREG32(0x15DC, 0);
5090
5091 /* empty context1-15 */
5092 /* FIXME start with 4G, once using 2 level pt switch to full
5093 * vm size space
5094 */
5095 /* set vm size, must be a multiple of 4 */
5096 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5097 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5098 for (i = 1; i < 16; i++) {
5099 if (i < 8)
5100 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5101 rdev->gart.table_addr >> 12);
5102 else
5103 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5104 rdev->gart.table_addr >> 12);
5105 }
5106
5107 /* enable context1-15 */
5108 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5109 (u32)(rdev->dummy_page.addr >> 12));
5110 WREG32(VM_CONTEXT1_CNTL2, 4);
5111 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5112 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5113 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5114 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5115 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5116 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5117 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5118 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5119 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5120 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5121 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5122 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5123 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5124
5125 /* TC cache setup ??? */
5126 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
5127 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
5128 WREG32(TC_CFG_L1_STORE_POLICY, 0);
5129
5130 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
5131 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
5132 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
5133 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
5134 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
5135
5136 WREG32(TC_CFG_L1_VOLATILE, 0);
5137 WREG32(TC_CFG_L2_VOLATILE, 0);
5138
5139 if (rdev->family == CHIP_KAVERI) {
5140 u32 tmp = RREG32(CHUB_CONTROL);
5141 tmp &= ~BYPASS_VM;
5142 WREG32(CHUB_CONTROL, tmp);
5143 }
5144
5145 /* XXX SH_MEM regs */
5146 /* where to put LDS, scratch, GPUVM in FSA64 space */
5147 mutex_lock(&rdev->srbm_mutex);
5148 for (i = 0; i < 16; i++) {
5149 cik_srbm_select(rdev, 0, 0, 0, i);
5150 /* CP and shaders */
5151 WREG32(SH_MEM_CONFIG, 0);
5152 WREG32(SH_MEM_APE1_BASE, 1);
5153 WREG32(SH_MEM_APE1_LIMIT, 0);
5154 WREG32(SH_MEM_BASES, 0);
5155 /* SDMA GFX */
5156 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5157 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5158 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5159 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5160 /* XXX SDMA RLC - todo */
5161 }
5162 cik_srbm_select(rdev, 0, 0, 0, 0);
5163 mutex_unlock(&rdev->srbm_mutex);
5164
5165 cik_pcie_gart_tlb_flush(rdev);
5166 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5167 (unsigned)(rdev->mc.gtt_size >> 20),
5168 (unsigned long long)rdev->gart.table_addr);
5169 rdev->gart.ready = true;
5170 return 0;
5171 }
5172
5173 /**
5174 * cik_pcie_gart_disable - gart disable
5175 *
5176 * @rdev: radeon_device pointer
5177 *
5178 * This disables all VM page table (CIK).
5179 */
5180 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5181 {
5182 /* Disable all tables */
5183 WREG32(VM_CONTEXT0_CNTL, 0);
5184 WREG32(VM_CONTEXT1_CNTL, 0);
5185 /* Setup TLB control */
5186 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5187 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5188 /* Setup L2 cache */
5189 WREG32(VM_L2_CNTL,
5190 ENABLE_L2_FRAGMENT_PROCESSING |
5191 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5192 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5193 EFFECTIVE_L2_QUEUE_SIZE(7) |
5194 CONTEXT1_IDENTITY_ACCESS_MODE(1));
5195 WREG32(VM_L2_CNTL2, 0);
5196 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5197 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5198 radeon_gart_table_vram_unpin(rdev);
5199 }
5200
5201 /**
5202 * cik_pcie_gart_fini - vm fini callback
5203 *
5204 * @rdev: radeon_device pointer
5205 *
5206 * Tears down the driver GART/VM setup (CIK).
5207 */
5208 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5209 {
5210 cik_pcie_gart_disable(rdev);
5211 radeon_gart_table_vram_free(rdev);
5212 radeon_gart_fini(rdev);
5213 }
5214
5215 /* vm parser */
5216 /**
5217 * cik_ib_parse - vm ib_parse callback
5218 *
5219 * @rdev: radeon_device pointer
5220 * @ib: indirect buffer pointer
5221 *
5222 * CIK uses hw IB checking so this is a nop (CIK).
5223 */
5224 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5225 {
5226 return 0;
5227 }
5228
5229 /*
5230 * vm
5231 * VMID 0 is the physical GPU addresses as used by the kernel.
5232 * VMIDs 1-15 are used for userspace clients and are handled
5233 * by the radeon vm/hsa code.
5234 */
5235 /**
5236 * cik_vm_init - cik vm init callback
5237 *
5238 * @rdev: radeon_device pointer
5239 *
5240 * Inits cik specific vm parameters (number of VMs, base of vram for
5241 * VMIDs 1-15) (CIK).
5242 * Returns 0 for success.
5243 */
5244 int cik_vm_init(struct radeon_device *rdev)
5245 {
5246 /* number of VMs */
5247 rdev->vm_manager.nvm = 16;
5248 /* base offset of vram pages */
5249 if (rdev->flags & RADEON_IS_IGP) {
5250 u64 tmp = RREG32(MC_VM_FB_OFFSET);
5251 tmp <<= 22;
5252 rdev->vm_manager.vram_base_offset = tmp;
5253 } else
5254 rdev->vm_manager.vram_base_offset = 0;
5255
5256 return 0;
5257 }
5258
5259 /**
5260 * cik_vm_fini - cik vm fini callback
5261 *
5262 * @rdev: radeon_device pointer
5263 *
5264 * Tear down any asic specific VM setup (CIK).
5265 */
5266 void cik_vm_fini(struct radeon_device *rdev)
5267 {
5268 }
5269
5270 /**
5271 * cik_vm_decode_fault - print human readable fault info
5272 *
5273 * @rdev: radeon_device pointer
5274 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5275 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5276 *
5277 * Print human readable fault information (CIK).
5278 */
5279 static void cik_vm_decode_fault(struct radeon_device *rdev,
5280 u32 status, u32 addr, u32 mc_client)
5281 {
5282 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5283 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5284 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5285 char *block = (char *)&mc_client;
5286
5287 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5288 protections, vmid, addr,
5289 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5290 block, mc_id);
5291 }
5292
5293 /**
5294 * cik_vm_flush - cik vm flush using the CP
5295 *
5296 * @rdev: radeon_device pointer
5297 *
5298 * Update the page table base and flush the VM TLB
5299 * using the CP (CIK).
5300 */
5301 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5302 {
5303 struct radeon_ring *ring = &rdev->ring[ridx];
5304
5305 if (vm == NULL)
5306 return;
5307
5308 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5309 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5310 WRITE_DATA_DST_SEL(0)));
5311 if (vm->id < 8) {
5312 radeon_ring_write(ring,
5313 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5314 } else {
5315 radeon_ring_write(ring,
5316 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5317 }
5318 radeon_ring_write(ring, 0);
5319 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5320
5321 /* update SH_MEM_* regs */
5322 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5323 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5324 WRITE_DATA_DST_SEL(0)));
5325 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5326 radeon_ring_write(ring, 0);
5327 radeon_ring_write(ring, VMID(vm->id));
5328
5329 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5330 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5331 WRITE_DATA_DST_SEL(0)));
5332 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5333 radeon_ring_write(ring, 0);
5334
5335 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5336 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5337 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5338 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5339
5340 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5341 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5342 WRITE_DATA_DST_SEL(0)));
5343 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5344 radeon_ring_write(ring, 0);
5345 radeon_ring_write(ring, VMID(0));
5346
5347 /* HDP flush */
5348 /* We should be using the WAIT_REG_MEM packet here like in
5349 * cik_fence_ring_emit(), but it causes the CP to hang in this
5350 * context...
5351 */
5352 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5353 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5354 WRITE_DATA_DST_SEL(0)));
5355 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5356 radeon_ring_write(ring, 0);
5357 radeon_ring_write(ring, 0);
5358
5359 /* bits 0-15 are the VM contexts0-15 */
5360 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5361 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5362 WRITE_DATA_DST_SEL(0)));
5363 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5364 radeon_ring_write(ring, 0);
5365 radeon_ring_write(ring, 1 << vm->id);
5366
5367 /* compute doesn't have PFP */
5368 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5369 /* sync PFP to ME, otherwise we might get invalid PFP reads */
5370 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5371 radeon_ring_write(ring, 0x0);
5372 }
5373 }
5374
5375 /**
5376 * cik_vm_set_page - update the page tables using sDMA
5377 *
5378 * @rdev: radeon_device pointer
5379 * @ib: indirect buffer to fill with commands
5380 * @pe: addr of the page entry
5381 * @addr: dst addr to write into pe
5382 * @count: number of page entries to update
5383 * @incr: increase next addr by incr bytes
5384 * @flags: access flags
5385 *
5386 * Update the page tables using CP or sDMA (CIK).
5387 */
5388 void cik_vm_set_page(struct radeon_device *rdev,
5389 struct radeon_ib *ib,
5390 uint64_t pe,
5391 uint64_t addr, unsigned count,
5392 uint32_t incr, uint32_t flags)
5393 {
5394 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
5395 uint64_t value;
5396 unsigned ndw;
5397
5398 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
5399 /* CP */
5400 while (count) {
5401 ndw = 2 + count * 2;
5402 if (ndw > 0x3FFE)
5403 ndw = 0x3FFE;
5404
5405 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
5406 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
5407 WRITE_DATA_DST_SEL(1));
5408 ib->ptr[ib->length_dw++] = pe;
5409 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5410 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
5411 if (flags & RADEON_VM_PAGE_SYSTEM) {
5412 value = radeon_vm_map_gart(rdev, addr);
5413 value &= 0xFFFFFFFFFFFFF000ULL;
5414 } else if (flags & RADEON_VM_PAGE_VALID) {
5415 value = addr;
5416 } else {
5417 value = 0;
5418 }
5419 addr += incr;
5420 value |= r600_flags;
5421 ib->ptr[ib->length_dw++] = value;
5422 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5423 }
5424 }
5425 } else {
5426 /* DMA */
5427 if (flags & RADEON_VM_PAGE_SYSTEM) {
5428 while (count) {
5429 ndw = count * 2;
5430 if (ndw > 0xFFFFE)
5431 ndw = 0xFFFFE;
5432
5433 /* for non-physically contiguous pages (system) */
5434 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
5435 ib->ptr[ib->length_dw++] = pe;
5436 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5437 ib->ptr[ib->length_dw++] = ndw;
5438 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
5439 if (flags & RADEON_VM_PAGE_SYSTEM) {
5440 value = radeon_vm_map_gart(rdev, addr);
5441 value &= 0xFFFFFFFFFFFFF000ULL;
5442 } else if (flags & RADEON_VM_PAGE_VALID) {
5443 value = addr;
5444 } else {
5445 value = 0;
5446 }
5447 addr += incr;
5448 value |= r600_flags;
5449 ib->ptr[ib->length_dw++] = value;
5450 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5451 }
5452 }
5453 } else {
5454 while (count) {
5455 ndw = count;
5456 if (ndw > 0x7FFFF)
5457 ndw = 0x7FFFF;
5458
5459 if (flags & RADEON_VM_PAGE_VALID)
5460 value = addr;
5461 else
5462 value = 0;
5463 /* for physically contiguous pages (vram) */
5464 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
5465 ib->ptr[ib->length_dw++] = pe; /* dst addr */
5466 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
5467 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
5468 ib->ptr[ib->length_dw++] = 0;
5469 ib->ptr[ib->length_dw++] = value; /* value */
5470 ib->ptr[ib->length_dw++] = upper_32_bits(value);
5471 ib->ptr[ib->length_dw++] = incr; /* increment size */
5472 ib->ptr[ib->length_dw++] = 0;
5473 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
5474 pe += ndw * 8;
5475 addr += ndw * incr;
5476 count -= ndw;
5477 }
5478 }
5479 while (ib->length_dw & 0x7)
5480 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
5481 }
5482 }
5483
5484 /**
5485 * cik_dma_vm_flush - cik vm flush using sDMA
5486 *
5487 * @rdev: radeon_device pointer
5488 *
5489 * Update the page table base and flush the VM TLB
5490 * using sDMA (CIK).
5491 */
5492 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5493 {
5494 struct radeon_ring *ring = &rdev->ring[ridx];
5495 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
5496 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
5497 u32 ref_and_mask;
5498
5499 if (vm == NULL)
5500 return;
5501
5502 if (ridx == R600_RING_TYPE_DMA_INDEX)
5503 ref_and_mask = SDMA0;
5504 else
5505 ref_and_mask = SDMA1;
5506
5507 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5508 if (vm->id < 8) {
5509 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5510 } else {
5511 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5512 }
5513 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5514
5515 /* update SH_MEM_* regs */
5516 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5517 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5518 radeon_ring_write(ring, VMID(vm->id));
5519
5520 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5521 radeon_ring_write(ring, SH_MEM_BASES >> 2);
5522 radeon_ring_write(ring, 0);
5523
5524 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5525 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
5526 radeon_ring_write(ring, 0);
5527
5528 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5529 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
5530 radeon_ring_write(ring, 1);
5531
5532 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5533 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
5534 radeon_ring_write(ring, 0);
5535
5536 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5537 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5538 radeon_ring_write(ring, VMID(0));
5539
5540 /* flush HDP */
5541 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
5542 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
5543 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
5544 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
5545 radeon_ring_write(ring, ref_and_mask); /* MASK */
5546 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
5547
5548 /* flush TLB */
5549 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
5550 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5551 radeon_ring_write(ring, 1 << vm->id);
5552 }
5553
5554 /*
5555 * RLC
5556 * The RLC is a multi-purpose microengine that handles a
5557 * variety of functions, the most important of which is
5558 * the interrupt controller.
5559 */
5560 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5561 bool enable)
5562 {
5563 u32 tmp = RREG32(CP_INT_CNTL_RING0);
5564
5565 if (enable)
5566 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5567 else
5568 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5569 WREG32(CP_INT_CNTL_RING0, tmp);
5570 }
5571
5572 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5573 {
5574 u32 tmp;
5575
5576 tmp = RREG32(RLC_LB_CNTL);
5577 if (enable)
5578 tmp |= LOAD_BALANCE_ENABLE;
5579 else
5580 tmp &= ~LOAD_BALANCE_ENABLE;
5581 WREG32(RLC_LB_CNTL, tmp);
5582 }
5583
5584 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5585 {
5586 u32 i, j, k;
5587 u32 mask;
5588
5589 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5590 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5591 cik_select_se_sh(rdev, i, j);
5592 for (k = 0; k < rdev->usec_timeout; k++) {
5593 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5594 break;
5595 udelay(1);
5596 }
5597 }
5598 }
5599 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5600
5601 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5602 for (k = 0; k < rdev->usec_timeout; k++) {
5603 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5604 break;
5605 udelay(1);
5606 }
5607 }
5608
5609 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5610 {
5611 u32 tmp;
5612
5613 tmp = RREG32(RLC_CNTL);
5614 if (tmp != rlc)
5615 WREG32(RLC_CNTL, rlc);
5616 }
5617
5618 static u32 cik_halt_rlc(struct radeon_device *rdev)
5619 {
5620 u32 data, orig;
5621
5622 orig = data = RREG32(RLC_CNTL);
5623
5624 if (data & RLC_ENABLE) {
5625 u32 i;
5626
5627 data &= ~RLC_ENABLE;
5628 WREG32(RLC_CNTL, data);
5629
5630 for (i = 0; i < rdev->usec_timeout; i++) {
5631 if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5632 break;
5633 udelay(1);
5634 }
5635
5636 cik_wait_for_rlc_serdes(rdev);
5637 }
5638
5639 return orig;
5640 }
5641
5642 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5643 {
5644 u32 tmp, i, mask;
5645
5646 tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5647 WREG32(RLC_GPR_REG2, tmp);
5648
5649 mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5650 for (i = 0; i < rdev->usec_timeout; i++) {
5651 if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5652 break;
5653 udelay(1);
5654 }
5655
5656 for (i = 0; i < rdev->usec_timeout; i++) {
5657 if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5658 break;
5659 udelay(1);
5660 }
5661 }
5662
5663 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5664 {
5665 u32 tmp;
5666
5667 tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5668 WREG32(RLC_GPR_REG2, tmp);
5669 }
5670
5671 /**
5672 * cik_rlc_stop - stop the RLC ME
5673 *
5674 * @rdev: radeon_device pointer
5675 *
5676 * Halt the RLC ME (MicroEngine) (CIK).
5677 */
5678 static void cik_rlc_stop(struct radeon_device *rdev)
5679 {
5680 WREG32(RLC_CNTL, 0);
5681
5682 cik_enable_gui_idle_interrupt(rdev, false);
5683
5684 cik_wait_for_rlc_serdes(rdev);
5685 }
5686
5687 /**
5688 * cik_rlc_start - start the RLC ME
5689 *
5690 * @rdev: radeon_device pointer
5691 *
5692 * Unhalt the RLC ME (MicroEngine) (CIK).
5693 */
5694 static void cik_rlc_start(struct radeon_device *rdev)
5695 {
5696 WREG32(RLC_CNTL, RLC_ENABLE);
5697
5698 cik_enable_gui_idle_interrupt(rdev, true);
5699
5700 udelay(50);
5701 }
5702
5703 /**
5704 * cik_rlc_resume - setup the RLC hw
5705 *
5706 * @rdev: radeon_device pointer
5707 *
5708 * Initialize the RLC registers, load the ucode,
5709 * and start the RLC (CIK).
5710 * Returns 0 for success, -EINVAL if the ucode is not available.
5711 */
5712 static int cik_rlc_resume(struct radeon_device *rdev)
5713 {
5714 u32 i, size, tmp;
5715 const __be32 *fw_data;
5716
5717 if (!rdev->rlc_fw)
5718 return -EINVAL;
5719
5720 switch (rdev->family) {
5721 case CHIP_BONAIRE:
5722 default:
5723 size = BONAIRE_RLC_UCODE_SIZE;
5724 break;
5725 case CHIP_KAVERI:
5726 size = KV_RLC_UCODE_SIZE;
5727 break;
5728 case CHIP_KABINI:
5729 size = KB_RLC_UCODE_SIZE;
5730 break;
5731 }
5732
5733 cik_rlc_stop(rdev);
5734
5735 /* disable CG */
5736 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5737 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5738
5739 si_rlc_reset(rdev);
5740
5741 cik_init_pg(rdev);
5742
5743 cik_init_cg(rdev);
5744
5745 WREG32(RLC_LB_CNTR_INIT, 0);
5746 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5747
5748 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5749 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5750 WREG32(RLC_LB_PARAMS, 0x00600408);
5751 WREG32(RLC_LB_CNTL, 0x80000004);
5752
5753 WREG32(RLC_MC_CNTL, 0);
5754 WREG32(RLC_UCODE_CNTL, 0);
5755
5756 fw_data = (const __be32 *)rdev->rlc_fw->data;
5757 WREG32(RLC_GPM_UCODE_ADDR, 0);
5758 for (i = 0; i < size; i++)
5759 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5760 WREG32(RLC_GPM_UCODE_ADDR, 0);
5761
5762 /* XXX - find out what chips support lbpw */
5763 cik_enable_lbpw(rdev, false);
5764
5765 if (rdev->family == CHIP_BONAIRE)
5766 WREG32(RLC_DRIVER_DMA_STATUS, 0);
5767
5768 cik_rlc_start(rdev);
5769
5770 return 0;
5771 }
5772
5773 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5774 {
5775 u32 data, orig, tmp, tmp2;
5776
5777 orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5778
5779 cik_enable_gui_idle_interrupt(rdev, enable);
5780
5781 if (enable) {
5782 tmp = cik_halt_rlc(rdev);
5783
5784 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5785 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5786 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5787 tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5788 WREG32(RLC_SERDES_WR_CTRL, tmp2);
5789
5790 cik_update_rlc(rdev, tmp);
5791
5792 data |= CGCG_EN | CGLS_EN;
5793 } else {
5794 RREG32(CB_CGTT_SCLK_CTRL);
5795 RREG32(CB_CGTT_SCLK_CTRL);
5796 RREG32(CB_CGTT_SCLK_CTRL);
5797 RREG32(CB_CGTT_SCLK_CTRL);
5798
5799 data &= ~(CGCG_EN | CGLS_EN);
5800 }
5801
5802 if (orig != data)
5803 WREG32(RLC_CGCG_CGLS_CTRL, data);
5804
5805 }
5806
5807 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5808 {
5809 u32 data, orig, tmp = 0;
5810
5811 if (enable) {
5812 orig = data = RREG32(CP_MEM_SLP_CNTL);
5813 data |= CP_MEM_LS_EN;
5814 if (orig != data)
5815 WREG32(CP_MEM_SLP_CNTL, data);
5816
5817 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5818 data &= 0xfffffffd;
5819 if (orig != data)
5820 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5821
5822 tmp = cik_halt_rlc(rdev);
5823
5824 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5825 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5826 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5827 data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5828 WREG32(RLC_SERDES_WR_CTRL, data);
5829
5830 cik_update_rlc(rdev, tmp);
5831
5832 orig = data = RREG32(CGTS_SM_CTRL_REG);
5833 data &= ~SM_MODE_MASK;
5834 data |= SM_MODE(0x2);
5835 data |= SM_MODE_ENABLE;
5836 data &= ~CGTS_OVERRIDE;
5837 data &= ~CGTS_LS_OVERRIDE;
5838 data &= ~ON_MONITOR_ADD_MASK;
5839 data |= ON_MONITOR_ADD_EN;
5840 data |= ON_MONITOR_ADD(0x96);
5841 if (orig != data)
5842 WREG32(CGTS_SM_CTRL_REG, data);
5843 } else {
5844 orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5845 data |= 0x00000002;
5846 if (orig != data)
5847 WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5848
5849 data = RREG32(RLC_MEM_SLP_CNTL);
5850 if (data & RLC_MEM_LS_EN) {
5851 data &= ~RLC_MEM_LS_EN;
5852 WREG32(RLC_MEM_SLP_CNTL, data);
5853 }
5854
5855 data = RREG32(CP_MEM_SLP_CNTL);
5856 if (data & CP_MEM_LS_EN) {
5857 data &= ~CP_MEM_LS_EN;
5858 WREG32(CP_MEM_SLP_CNTL, data);
5859 }
5860
5861 orig = data = RREG32(CGTS_SM_CTRL_REG);
5862 data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5863 if (orig != data)
5864 WREG32(CGTS_SM_CTRL_REG, data);
5865
5866 tmp = cik_halt_rlc(rdev);
5867
5868 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5869 WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5870 WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5871 data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5872 WREG32(RLC_SERDES_WR_CTRL, data);
5873
5874 cik_update_rlc(rdev, tmp);
5875 }
5876 }
5877
5878 static const u32 mc_cg_registers[] =
5879 {
5880 MC_HUB_MISC_HUB_CG,
5881 MC_HUB_MISC_SIP_CG,
5882 MC_HUB_MISC_VM_CG,
5883 MC_XPB_CLK_GAT,
5884 ATC_MISC_CG,
5885 MC_CITF_MISC_WR_CG,
5886 MC_CITF_MISC_RD_CG,
5887 MC_CITF_MISC_VM_CG,
5888 VM_L2_CG,
5889 };
5890
5891 static void cik_enable_mc_ls(struct radeon_device *rdev,
5892 bool enable)
5893 {
5894 int i;
5895 u32 orig, data;
5896
5897 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5898 orig = data = RREG32(mc_cg_registers[i]);
5899 if (enable)
5900 data |= MC_LS_ENABLE;
5901 else
5902 data &= ~MC_LS_ENABLE;
5903 if (data != orig)
5904 WREG32(mc_cg_registers[i], data);
5905 }
5906 }
5907
5908 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5909 bool enable)
5910 {
5911 int i;
5912 u32 orig, data;
5913
5914 for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5915 orig = data = RREG32(mc_cg_registers[i]);
5916 if (enable)
5917 data |= MC_CG_ENABLE;
5918 else
5919 data &= ~MC_CG_ENABLE;
5920 if (data != orig)
5921 WREG32(mc_cg_registers[i], data);
5922 }
5923 }
5924
5925 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5926 bool enable)
5927 {
5928 u32 orig, data;
5929
5930 if (enable) {
5931 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5932 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5933 } else {
5934 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5935 data |= 0xff000000;
5936 if (data != orig)
5937 WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5938
5939 orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5940 data |= 0xff000000;
5941 if (data != orig)
5942 WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5943 }
5944 }
5945
5946 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5947 bool enable)
5948 {
5949 u32 orig, data;
5950
5951 if (enable) {
5952 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5953 data |= 0x100;
5954 if (orig != data)
5955 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5956
5957 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5958 data |= 0x100;
5959 if (orig != data)
5960 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5961 } else {
5962 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5963 data &= ~0x100;
5964 if (orig != data)
5965 WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5966
5967 orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5968 data &= ~0x100;
5969 if (orig != data)
5970 WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5971 }
5972 }
5973
5974 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5975 bool enable)
5976 {
5977 u32 orig, data;
5978
5979 if (enable) {
5980 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5981 data = 0xfff;
5982 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5983
5984 orig = data = RREG32(UVD_CGC_CTRL);
5985 data |= DCM;
5986 if (orig != data)
5987 WREG32(UVD_CGC_CTRL, data);
5988 } else {
5989 data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5990 data &= ~0xfff;
5991 WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5992
5993 orig = data = RREG32(UVD_CGC_CTRL);
5994 data &= ~DCM;
5995 if (orig != data)
5996 WREG32(UVD_CGC_CTRL, data);
5997 }
5998 }
5999
6000 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6001 bool enable)
6002 {
6003 u32 orig, data;
6004
6005 orig = data = RREG32(HDP_HOST_PATH_CNTL);
6006
6007 if (enable)
6008 data &= ~CLOCK_GATING_DIS;
6009 else
6010 data |= CLOCK_GATING_DIS;
6011
6012 if (orig != data)
6013 WREG32(HDP_HOST_PATH_CNTL, data);
6014 }
6015
6016 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6017 bool enable)
6018 {
6019 u32 orig, data;
6020
6021 orig = data = RREG32(HDP_MEM_POWER_LS);
6022
6023 if (enable)
6024 data |= HDP_LS_ENABLE;
6025 else
6026 data &= ~HDP_LS_ENABLE;
6027
6028 if (orig != data)
6029 WREG32(HDP_MEM_POWER_LS, data);
6030 }
6031
6032 void cik_update_cg(struct radeon_device *rdev,
6033 u32 block, bool enable)
6034 {
6035 if (block & RADEON_CG_BLOCK_GFX) {
6036 /* order matters! */
6037 if (enable) {
6038 cik_enable_mgcg(rdev, true);
6039 cik_enable_cgcg(rdev, true);
6040 } else {
6041 cik_enable_cgcg(rdev, false);
6042 cik_enable_mgcg(rdev, false);
6043 }
6044 }
6045
6046 if (block & RADEON_CG_BLOCK_MC) {
6047 if (!(rdev->flags & RADEON_IS_IGP)) {
6048 cik_enable_mc_mgcg(rdev, enable);
6049 cik_enable_mc_ls(rdev, enable);
6050 }
6051 }
6052
6053 if (block & RADEON_CG_BLOCK_SDMA) {
6054 cik_enable_sdma_mgcg(rdev, enable);
6055 cik_enable_sdma_mgls(rdev, enable);
6056 }
6057
6058 if (block & RADEON_CG_BLOCK_UVD) {
6059 if (rdev->has_uvd)
6060 cik_enable_uvd_mgcg(rdev, enable);
6061 }
6062
6063 if (block & RADEON_CG_BLOCK_HDP) {
6064 cik_enable_hdp_mgcg(rdev, enable);
6065 cik_enable_hdp_ls(rdev, enable);
6066 }
6067 }
6068
6069 static void cik_init_cg(struct radeon_device *rdev)
6070 {
6071
6072 cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false); /* XXX true */
6073
6074 if (rdev->has_uvd)
6075 si_init_uvd_internal_cg(rdev);
6076
6077 cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6078 RADEON_CG_BLOCK_SDMA |
6079 RADEON_CG_BLOCK_UVD |
6080 RADEON_CG_BLOCK_HDP), true);
6081 }
6082
6083 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6084 bool enable)
6085 {
6086 u32 data, orig;
6087
6088 orig = data = RREG32(RLC_PG_CNTL);
6089 if (enable)
6090 data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6091 else
6092 data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6093 if (orig != data)
6094 WREG32(RLC_PG_CNTL, data);
6095 }
6096
6097 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6098 bool enable)
6099 {
6100 u32 data, orig;
6101
6102 orig = data = RREG32(RLC_PG_CNTL);
6103 if (enable)
6104 data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6105 else
6106 data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6107 if (orig != data)
6108 WREG32(RLC_PG_CNTL, data);
6109 }
6110
6111 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6112 {
6113 u32 data, orig;
6114
6115 orig = data = RREG32(RLC_PG_CNTL);
6116 if (enable)
6117 data &= ~DISABLE_CP_PG;
6118 else
6119 data |= DISABLE_CP_PG;
6120 if (orig != data)
6121 WREG32(RLC_PG_CNTL, data);
6122 }
6123
6124 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6125 {
6126 u32 data, orig;
6127
6128 orig = data = RREG32(RLC_PG_CNTL);
6129 if (enable)
6130 data &= ~DISABLE_GDS_PG;
6131 else
6132 data |= DISABLE_GDS_PG;
6133 if (orig != data)
6134 WREG32(RLC_PG_CNTL, data);
6135 }
6136
6137 #define CP_ME_TABLE_SIZE 96
6138 #define CP_ME_TABLE_OFFSET 2048
6139 #define CP_MEC_TABLE_OFFSET 4096
6140
6141 void cik_init_cp_pg_table(struct radeon_device *rdev)
6142 {
6143 const __be32 *fw_data;
6144 volatile u32 *dst_ptr;
6145 int me, i, max_me = 4;
6146 u32 bo_offset = 0;
6147 u32 table_offset;
6148
6149 if (rdev->family == CHIP_KAVERI)
6150 max_me = 5;
6151
6152 if (rdev->rlc.cp_table_ptr == NULL)
6153 return;
6154
6155 /* write the cp table buffer */
6156 dst_ptr = rdev->rlc.cp_table_ptr;
6157 for (me = 0; me < max_me; me++) {
6158 if (me == 0) {
6159 fw_data = (const __be32 *)rdev->ce_fw->data;
6160 table_offset = CP_ME_TABLE_OFFSET;
6161 } else if (me == 1) {
6162 fw_data = (const __be32 *)rdev->pfp_fw->data;
6163 table_offset = CP_ME_TABLE_OFFSET;
6164 } else if (me == 2) {
6165 fw_data = (const __be32 *)rdev->me_fw->data;
6166 table_offset = CP_ME_TABLE_OFFSET;
6167 } else {
6168 fw_data = (const __be32 *)rdev->mec_fw->data;
6169 table_offset = CP_MEC_TABLE_OFFSET;
6170 }
6171
6172 for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6173 dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
6174 }
6175 bo_offset += CP_ME_TABLE_SIZE;
6176 }
6177 }
6178
6179 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6180 bool enable)
6181 {
6182 u32 data, orig;
6183
6184 if (enable) {
6185 orig = data = RREG32(RLC_PG_CNTL);
6186 data |= GFX_PG_ENABLE;
6187 if (orig != data)
6188 WREG32(RLC_PG_CNTL, data);
6189
6190 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6191 data |= AUTO_PG_EN;
6192 if (orig != data)
6193 WREG32(RLC_AUTO_PG_CTRL, data);
6194 } else {
6195 orig = data = RREG32(RLC_PG_CNTL);
6196 data &= ~GFX_PG_ENABLE;
6197 if (orig != data)
6198 WREG32(RLC_PG_CNTL, data);
6199
6200 orig = data = RREG32(RLC_AUTO_PG_CTRL);
6201 data &= ~AUTO_PG_EN;
6202 if (orig != data)
6203 WREG32(RLC_AUTO_PG_CTRL, data);
6204
6205 data = RREG32(DB_RENDER_CONTROL);
6206 }
6207 }
6208
6209 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6210 {
6211 u32 mask = 0, tmp, tmp1;
6212 int i;
6213
6214 cik_select_se_sh(rdev, se, sh);
6215 tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6216 tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6217 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6218
6219 tmp &= 0xffff0000;
6220
6221 tmp |= tmp1;
6222 tmp >>= 16;
6223
6224 for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6225 mask <<= 1;
6226 mask |= 1;
6227 }
6228
6229 return (~tmp) & mask;
6230 }
6231
6232 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6233 {
6234 u32 i, j, k, active_cu_number = 0;
6235 u32 mask, counter, cu_bitmap;
6236 u32 tmp = 0;
6237
6238 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6239 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6240 mask = 1;
6241 cu_bitmap = 0;
6242 counter = 0;
6243 for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6244 if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6245 if (counter < 2)
6246 cu_bitmap |= mask;
6247 counter ++;
6248 }
6249 mask <<= 1;
6250 }
6251
6252 active_cu_number += counter;
6253 tmp |= (cu_bitmap << (i * 16 + j * 8));
6254 }
6255 }
6256
6257 WREG32(RLC_PG_AO_CU_MASK, tmp);
6258
6259 tmp = RREG32(RLC_MAX_PG_CU);
6260 tmp &= ~MAX_PU_CU_MASK;
6261 tmp |= MAX_PU_CU(active_cu_number);
6262 WREG32(RLC_MAX_PG_CU, tmp);
6263 }
6264
6265 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6266 bool enable)
6267 {
6268 u32 data, orig;
6269
6270 orig = data = RREG32(RLC_PG_CNTL);
6271 if (enable)
6272 data |= STATIC_PER_CU_PG_ENABLE;
6273 else
6274 data &= ~STATIC_PER_CU_PG_ENABLE;
6275 if (orig != data)
6276 WREG32(RLC_PG_CNTL, data);
6277 }
6278
6279 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6280 bool enable)
6281 {
6282 u32 data, orig;
6283
6284 orig = data = RREG32(RLC_PG_CNTL);
6285 if (enable)
6286 data |= DYN_PER_CU_PG_ENABLE;
6287 else
6288 data &= ~DYN_PER_CU_PG_ENABLE;
6289 if (orig != data)
6290 WREG32(RLC_PG_CNTL, data);
6291 }
6292
6293 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6294 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D
6295
6296 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6297 {
6298 u32 data, orig;
6299 u32 i;
6300
6301 if (rdev->rlc.cs_data) {
6302 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6303 WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6304 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_gpu_addr);
6305 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6306 } else {
6307 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6308 for (i = 0; i < 3; i++)
6309 WREG32(RLC_GPM_SCRATCH_DATA, 0);
6310 }
6311 if (rdev->rlc.reg_list) {
6312 WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6313 for (i = 0; i < rdev->rlc.reg_list_size; i++)
6314 WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6315 }
6316
6317 orig = data = RREG32(RLC_PG_CNTL);
6318 data |= GFX_PG_SRC;
6319 if (orig != data)
6320 WREG32(RLC_PG_CNTL, data);
6321
6322 WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6323 WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6324
6325 data = RREG32(CP_RB_WPTR_POLL_CNTL);
6326 data &= ~IDLE_POLL_COUNT_MASK;
6327 data |= IDLE_POLL_COUNT(0x60);
6328 WREG32(CP_RB_WPTR_POLL_CNTL, data);
6329
6330 data = 0x10101010;
6331 WREG32(RLC_PG_DELAY, data);
6332
6333 data = RREG32(RLC_PG_DELAY_2);
6334 data &= ~0xff;
6335 data |= 0x3;
6336 WREG32(RLC_PG_DELAY_2, data);
6337
6338 data = RREG32(RLC_AUTO_PG_CTRL);
6339 data &= ~GRBM_REG_SGIT_MASK;
6340 data |= GRBM_REG_SGIT(0x700);
6341 WREG32(RLC_AUTO_PG_CTRL, data);
6342
6343 }
6344
6345 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6346 {
6347 bool has_pg = false;
6348 bool has_dyn_mgpg = false;
6349 bool has_static_mgpg = false;
6350
6351 /* only APUs have PG */
6352 if (rdev->flags & RADEON_IS_IGP) {
6353 has_pg = true;
6354 has_static_mgpg = true;
6355 if (rdev->family == CHIP_KAVERI)
6356 has_dyn_mgpg = true;
6357 }
6358
6359 if (has_pg) {
6360 cik_enable_gfx_cgpg(rdev, enable);
6361 if (enable) {
6362 cik_enable_gfx_static_mgpg(rdev, has_static_mgpg);
6363 cik_enable_gfx_dynamic_mgpg(rdev, has_dyn_mgpg);
6364 } else {
6365 cik_enable_gfx_static_mgpg(rdev, false);
6366 cik_enable_gfx_dynamic_mgpg(rdev, false);
6367 }
6368 }
6369
6370 }
6371
6372 void cik_init_pg(struct radeon_device *rdev)
6373 {
6374 bool has_pg = false;
6375
6376 /* only APUs have PG */
6377 if (rdev->flags & RADEON_IS_IGP) {
6378 /* XXX disable this for now */
6379 /* has_pg = true; */
6380 }
6381
6382 if (has_pg) {
6383 cik_enable_sck_slowdown_on_pu(rdev, true);
6384 cik_enable_sck_slowdown_on_pd(rdev, true);
6385 cik_init_gfx_cgpg(rdev);
6386 cik_enable_cp_pg(rdev, true);
6387 cik_enable_gds_pg(rdev, true);
6388 cik_init_ao_cu_mask(rdev);
6389 cik_update_gfx_pg(rdev, true);
6390 }
6391 }
6392
6393 /*
6394 * Interrupts
6395 * Starting with r6xx, interrupts are handled via a ring buffer.
6396 * Ring buffers are areas of GPU accessible memory that the GPU
6397 * writes interrupt vectors into and the host reads vectors out of.
6398 * There is a rptr (read pointer) that determines where the
6399 * host is currently reading, and a wptr (write pointer)
6400 * which determines where the GPU has written. When the
6401 * pointers are equal, the ring is idle. When the GPU
6402 * writes vectors to the ring buffer, it increments the
6403 * wptr. When there is an interrupt, the host then starts
6404 * fetching commands and processing them until the pointers are
6405 * equal again at which point it updates the rptr.
6406 */
6407
6408 /**
6409 * cik_enable_interrupts - Enable the interrupt ring buffer
6410 *
6411 * @rdev: radeon_device pointer
6412 *
6413 * Enable the interrupt ring buffer (CIK).
6414 */
6415 static void cik_enable_interrupts(struct radeon_device *rdev)
6416 {
6417 u32 ih_cntl = RREG32(IH_CNTL);
6418 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6419
6420 ih_cntl |= ENABLE_INTR;
6421 ih_rb_cntl |= IH_RB_ENABLE;
6422 WREG32(IH_CNTL, ih_cntl);
6423 WREG32(IH_RB_CNTL, ih_rb_cntl);
6424 rdev->ih.enabled = true;
6425 }
6426
6427 /**
6428 * cik_disable_interrupts - Disable the interrupt ring buffer
6429 *
6430 * @rdev: radeon_device pointer
6431 *
6432 * Disable the interrupt ring buffer (CIK).
6433 */
6434 static void cik_disable_interrupts(struct radeon_device *rdev)
6435 {
6436 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6437 u32 ih_cntl = RREG32(IH_CNTL);
6438
6439 ih_rb_cntl &= ~IH_RB_ENABLE;
6440 ih_cntl &= ~ENABLE_INTR;
6441 WREG32(IH_RB_CNTL, ih_rb_cntl);
6442 WREG32(IH_CNTL, ih_cntl);
6443 /* set rptr, wptr to 0 */
6444 WREG32(IH_RB_RPTR, 0);
6445 WREG32(IH_RB_WPTR, 0);
6446 rdev->ih.enabled = false;
6447 rdev->ih.rptr = 0;
6448 }
6449
6450 /**
6451 * cik_disable_interrupt_state - Disable all interrupt sources
6452 *
6453 * @rdev: radeon_device pointer
6454 *
6455 * Clear all interrupt enable bits used by the driver (CIK).
6456 */
6457 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6458 {
6459 u32 tmp;
6460
6461 /* gfx ring */
6462 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6463 /* sdma */
6464 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6465 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6466 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6467 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6468 /* compute queues */
6469 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6470 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6471 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6472 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6473 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6474 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6475 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6476 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6477 /* grbm */
6478 WREG32(GRBM_INT_CNTL, 0);
6479 /* vline/vblank, etc. */
6480 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6481 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6482 if (rdev->num_crtc >= 4) {
6483 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6484 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6485 }
6486 if (rdev->num_crtc >= 6) {
6487 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6488 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6489 }
6490
6491 /* dac hotplug */
6492 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6493
6494 /* digital hotplug */
6495 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6496 WREG32(DC_HPD1_INT_CONTROL, tmp);
6497 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6498 WREG32(DC_HPD2_INT_CONTROL, tmp);
6499 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6500 WREG32(DC_HPD3_INT_CONTROL, tmp);
6501 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6502 WREG32(DC_HPD4_INT_CONTROL, tmp);
6503 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6504 WREG32(DC_HPD5_INT_CONTROL, tmp);
6505 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6506 WREG32(DC_HPD6_INT_CONTROL, tmp);
6507
6508 }
6509
6510 /**
6511 * cik_irq_init - init and enable the interrupt ring
6512 *
6513 * @rdev: radeon_device pointer
6514 *
6515 * Allocate a ring buffer for the interrupt controller,
6516 * enable the RLC, disable interrupts, enable the IH
6517 * ring buffer and enable it (CIK).
6518 * Called at device load and reume.
6519 * Returns 0 for success, errors for failure.
6520 */
6521 static int cik_irq_init(struct radeon_device *rdev)
6522 {
6523 int ret = 0;
6524 int rb_bufsz;
6525 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6526
6527 /* allocate ring */
6528 ret = r600_ih_ring_alloc(rdev);
6529 if (ret)
6530 return ret;
6531
6532 /* disable irqs */
6533 cik_disable_interrupts(rdev);
6534
6535 /* init rlc */
6536 ret = cik_rlc_resume(rdev);
6537 if (ret) {
6538 r600_ih_ring_fini(rdev);
6539 return ret;
6540 }
6541
6542 /* setup interrupt control */
6543 /* XXX this should actually be a bus address, not an MC address. same on older asics */
6544 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6545 interrupt_cntl = RREG32(INTERRUPT_CNTL);
6546 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6547 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6548 */
6549 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6550 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6551 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6552 WREG32(INTERRUPT_CNTL, interrupt_cntl);
6553
6554 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6555 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
6556
6557 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6558 IH_WPTR_OVERFLOW_CLEAR |
6559 (rb_bufsz << 1));
6560
6561 if (rdev->wb.enabled)
6562 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6563
6564 /* set the writeback address whether it's enabled or not */
6565 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6566 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6567
6568 WREG32(IH_RB_CNTL, ih_rb_cntl);
6569
6570 /* set rptr, wptr to 0 */
6571 WREG32(IH_RB_RPTR, 0);
6572 WREG32(IH_RB_WPTR, 0);
6573
6574 /* Default settings for IH_CNTL (disabled at first) */
6575 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6576 /* RPTR_REARM only works if msi's are enabled */
6577 if (rdev->msi_enabled)
6578 ih_cntl |= RPTR_REARM;
6579 WREG32(IH_CNTL, ih_cntl);
6580
6581 /* force the active interrupt state to all disabled */
6582 cik_disable_interrupt_state(rdev);
6583
6584 pci_set_master(rdev->pdev);
6585
6586 /* enable irqs */
6587 cik_enable_interrupts(rdev);
6588
6589 return ret;
6590 }
6591
6592 /**
6593 * cik_irq_set - enable/disable interrupt sources
6594 *
6595 * @rdev: radeon_device pointer
6596 *
6597 * Enable interrupt sources on the GPU (vblanks, hpd,
6598 * etc.) (CIK).
6599 * Returns 0 for success, errors for failure.
6600 */
6601 int cik_irq_set(struct radeon_device *rdev)
6602 {
6603 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6604 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6605 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6606 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6607 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6608 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6609 u32 grbm_int_cntl = 0;
6610 u32 dma_cntl, dma_cntl1;
6611 u32 thermal_int;
6612
6613 if (!rdev->irq.installed) {
6614 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6615 return -EINVAL;
6616 }
6617 /* don't enable anything if the ih is disabled */
6618 if (!rdev->ih.enabled) {
6619 cik_disable_interrupts(rdev);
6620 /* force the active interrupt state to all disabled */
6621 cik_disable_interrupt_state(rdev);
6622 return 0;
6623 }
6624
6625 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6626 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6627 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6628 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6629 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6630 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6631
6632 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6633 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6634
6635 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6636 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6637 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6638 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6639 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6640 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6641 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6642 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6643
6644 if (rdev->flags & RADEON_IS_IGP)
6645 thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6646 ~(THERM_INTH_MASK | THERM_INTL_MASK);
6647 else
6648 thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6649 ~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6650
6651 /* enable CP interrupts on all rings */
6652 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6653 DRM_DEBUG("cik_irq_set: sw int gfx\n");
6654 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6655 }
6656 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6657 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6658 DRM_DEBUG("si_irq_set: sw int cp1\n");
6659 if (ring->me == 1) {
6660 switch (ring->pipe) {
6661 case 0:
6662 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6663 break;
6664 case 1:
6665 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6666 break;
6667 case 2:
6668 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6669 break;
6670 case 3:
6671 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6672 break;
6673 default:
6674 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6675 break;
6676 }
6677 } else if (ring->me == 2) {
6678 switch (ring->pipe) {
6679 case 0:
6680 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6681 break;
6682 case 1:
6683 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6684 break;
6685 case 2:
6686 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6687 break;
6688 case 3:
6689 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6690 break;
6691 default:
6692 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6693 break;
6694 }
6695 } else {
6696 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6697 }
6698 }
6699 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6700 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6701 DRM_DEBUG("si_irq_set: sw int cp2\n");
6702 if (ring->me == 1) {
6703 switch (ring->pipe) {
6704 case 0:
6705 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6706 break;
6707 case 1:
6708 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6709 break;
6710 case 2:
6711 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6712 break;
6713 case 3:
6714 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6715 break;
6716 default:
6717 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6718 break;
6719 }
6720 } else if (ring->me == 2) {
6721 switch (ring->pipe) {
6722 case 0:
6723 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6724 break;
6725 case 1:
6726 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6727 break;
6728 case 2:
6729 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6730 break;
6731 case 3:
6732 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6733 break;
6734 default:
6735 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6736 break;
6737 }
6738 } else {
6739 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6740 }
6741 }
6742
6743 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6744 DRM_DEBUG("cik_irq_set: sw int dma\n");
6745 dma_cntl |= TRAP_ENABLE;
6746 }
6747
6748 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6749 DRM_DEBUG("cik_irq_set: sw int dma1\n");
6750 dma_cntl1 |= TRAP_ENABLE;
6751 }
6752
6753 if (rdev->irq.crtc_vblank_int[0] ||
6754 atomic_read(&rdev->irq.pflip[0])) {
6755 DRM_DEBUG("cik_irq_set: vblank 0\n");
6756 crtc1 |= VBLANK_INTERRUPT_MASK;
6757 }
6758 if (rdev->irq.crtc_vblank_int[1] ||
6759 atomic_read(&rdev->irq.pflip[1])) {
6760 DRM_DEBUG("cik_irq_set: vblank 1\n");
6761 crtc2 |= VBLANK_INTERRUPT_MASK;
6762 }
6763 if (rdev->irq.crtc_vblank_int[2] ||
6764 atomic_read(&rdev->irq.pflip[2])) {
6765 DRM_DEBUG("cik_irq_set: vblank 2\n");
6766 crtc3 |= VBLANK_INTERRUPT_MASK;
6767 }
6768 if (rdev->irq.crtc_vblank_int[3] ||
6769 atomic_read(&rdev->irq.pflip[3])) {
6770 DRM_DEBUG("cik_irq_set: vblank 3\n");
6771 crtc4 |= VBLANK_INTERRUPT_MASK;
6772 }
6773 if (rdev->irq.crtc_vblank_int[4] ||
6774 atomic_read(&rdev->irq.pflip[4])) {
6775 DRM_DEBUG("cik_irq_set: vblank 4\n");
6776 crtc5 |= VBLANK_INTERRUPT_MASK;
6777 }
6778 if (rdev->irq.crtc_vblank_int[5] ||
6779 atomic_read(&rdev->irq.pflip[5])) {
6780 DRM_DEBUG("cik_irq_set: vblank 5\n");
6781 crtc6 |= VBLANK_INTERRUPT_MASK;
6782 }
6783 if (rdev->irq.hpd[0]) {
6784 DRM_DEBUG("cik_irq_set: hpd 1\n");
6785 hpd1 |= DC_HPDx_INT_EN;
6786 }
6787 if (rdev->irq.hpd[1]) {
6788 DRM_DEBUG("cik_irq_set: hpd 2\n");
6789 hpd2 |= DC_HPDx_INT_EN;
6790 }
6791 if (rdev->irq.hpd[2]) {
6792 DRM_DEBUG("cik_irq_set: hpd 3\n");
6793 hpd3 |= DC_HPDx_INT_EN;
6794 }
6795 if (rdev->irq.hpd[3]) {
6796 DRM_DEBUG("cik_irq_set: hpd 4\n");
6797 hpd4 |= DC_HPDx_INT_EN;
6798 }
6799 if (rdev->irq.hpd[4]) {
6800 DRM_DEBUG("cik_irq_set: hpd 5\n");
6801 hpd5 |= DC_HPDx_INT_EN;
6802 }
6803 if (rdev->irq.hpd[5]) {
6804 DRM_DEBUG("cik_irq_set: hpd 6\n");
6805 hpd6 |= DC_HPDx_INT_EN;
6806 }
6807
6808 if (rdev->irq.dpm_thermal) {
6809 DRM_DEBUG("dpm thermal\n");
6810 if (rdev->flags & RADEON_IS_IGP)
6811 thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6812 else
6813 thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6814 }
6815
6816 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6817
6818 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6819 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6820
6821 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6822 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6823 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6824 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6825 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6826 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6827 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6828 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6829
6830 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6831
6832 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6833 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6834 if (rdev->num_crtc >= 4) {
6835 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6836 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6837 }
6838 if (rdev->num_crtc >= 6) {
6839 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6840 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6841 }
6842
6843 WREG32(DC_HPD1_INT_CONTROL, hpd1);
6844 WREG32(DC_HPD2_INT_CONTROL, hpd2);
6845 WREG32(DC_HPD3_INT_CONTROL, hpd3);
6846 WREG32(DC_HPD4_INT_CONTROL, hpd4);
6847 WREG32(DC_HPD5_INT_CONTROL, hpd5);
6848 WREG32(DC_HPD6_INT_CONTROL, hpd6);
6849
6850 if (rdev->flags & RADEON_IS_IGP)
6851 WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6852 else
6853 WREG32_SMC(CG_THERMAL_INT, thermal_int);
6854
6855 return 0;
6856 }
6857
6858 /**
6859 * cik_irq_ack - ack interrupt sources
6860 *
6861 * @rdev: radeon_device pointer
6862 *
6863 * Ack interrupt sources on the GPU (vblanks, hpd,
6864 * etc.) (CIK). Certain interrupts sources are sw
6865 * generated and do not require an explicit ack.
6866 */
6867 static inline void cik_irq_ack(struct radeon_device *rdev)
6868 {
6869 u32 tmp;
6870
6871 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6872 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6873 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6874 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6875 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6876 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6877 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6878
6879 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6880 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6881 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6882 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6883 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6884 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6885 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6886 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6887
6888 if (rdev->num_crtc >= 4) {
6889 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6890 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6891 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6892 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6893 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6894 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6895 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6896 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6897 }
6898
6899 if (rdev->num_crtc >= 6) {
6900 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6901 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6902 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6903 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6904 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6905 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6906 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6907 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6908 }
6909
6910 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6911 tmp = RREG32(DC_HPD1_INT_CONTROL);
6912 tmp |= DC_HPDx_INT_ACK;
6913 WREG32(DC_HPD1_INT_CONTROL, tmp);
6914 }
6915 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6916 tmp = RREG32(DC_HPD2_INT_CONTROL);
6917 tmp |= DC_HPDx_INT_ACK;
6918 WREG32(DC_HPD2_INT_CONTROL, tmp);
6919 }
6920 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6921 tmp = RREG32(DC_HPD3_INT_CONTROL);
6922 tmp |= DC_HPDx_INT_ACK;
6923 WREG32(DC_HPD3_INT_CONTROL, tmp);
6924 }
6925 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6926 tmp = RREG32(DC_HPD4_INT_CONTROL);
6927 tmp |= DC_HPDx_INT_ACK;
6928 WREG32(DC_HPD4_INT_CONTROL, tmp);
6929 }
6930 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6931 tmp = RREG32(DC_HPD5_INT_CONTROL);
6932 tmp |= DC_HPDx_INT_ACK;
6933 WREG32(DC_HPD5_INT_CONTROL, tmp);
6934 }
6935 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6936 tmp = RREG32(DC_HPD5_INT_CONTROL);
6937 tmp |= DC_HPDx_INT_ACK;
6938 WREG32(DC_HPD6_INT_CONTROL, tmp);
6939 }
6940 }
6941
6942 /**
6943 * cik_irq_disable - disable interrupts
6944 *
6945 * @rdev: radeon_device pointer
6946 *
6947 * Disable interrupts on the hw (CIK).
6948 */
6949 static void cik_irq_disable(struct radeon_device *rdev)
6950 {
6951 cik_disable_interrupts(rdev);
6952 /* Wait and acknowledge irq */
6953 mdelay(1);
6954 cik_irq_ack(rdev);
6955 cik_disable_interrupt_state(rdev);
6956 }
6957
6958 /**
6959 * cik_irq_disable - disable interrupts for suspend
6960 *
6961 * @rdev: radeon_device pointer
6962 *
6963 * Disable interrupts and stop the RLC (CIK).
6964 * Used for suspend.
6965 */
6966 static void cik_irq_suspend(struct radeon_device *rdev)
6967 {
6968 cik_irq_disable(rdev);
6969 cik_rlc_stop(rdev);
6970 }
6971
6972 /**
6973 * cik_irq_fini - tear down interrupt support
6974 *
6975 * @rdev: radeon_device pointer
6976 *
6977 * Disable interrupts on the hw and free the IH ring
6978 * buffer (CIK).
6979 * Used for driver unload.
6980 */
6981 static void cik_irq_fini(struct radeon_device *rdev)
6982 {
6983 cik_irq_suspend(rdev);
6984 r600_ih_ring_fini(rdev);
6985 }
6986
6987 /**
6988 * cik_get_ih_wptr - get the IH ring buffer wptr
6989 *
6990 * @rdev: radeon_device pointer
6991 *
6992 * Get the IH ring buffer wptr from either the register
6993 * or the writeback memory buffer (CIK). Also check for
6994 * ring buffer overflow and deal with it.
6995 * Used by cik_irq_process().
6996 * Returns the value of the wptr.
6997 */
6998 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6999 {
7000 u32 wptr, tmp;
7001
7002 if (rdev->wb.enabled)
7003 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7004 else
7005 wptr = RREG32(IH_RB_WPTR);
7006
7007 if (wptr & RB_OVERFLOW) {
7008 /* When a ring buffer overflow happen start parsing interrupt
7009 * from the last not overwritten vector (wptr + 16). Hopefully
7010 * this should allow us to catchup.
7011 */
7012 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7013 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7014 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7015 tmp = RREG32(IH_RB_CNTL);
7016 tmp |= IH_WPTR_OVERFLOW_CLEAR;
7017 WREG32(IH_RB_CNTL, tmp);
7018 }
7019 return (wptr & rdev->ih.ptr_mask);
7020 }
7021
7022 /* CIK IV Ring
7023 * Each IV ring entry is 128 bits:
7024 * [7:0] - interrupt source id
7025 * [31:8] - reserved
7026 * [59:32] - interrupt source data
7027 * [63:60] - reserved
7028 * [71:64] - RINGID
7029 * CP:
7030 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7031 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7032 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7033 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7034 * PIPE_ID - ME0 0=3D
7035 * - ME1&2 compute dispatcher (4 pipes each)
7036 * SDMA:
7037 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
7038 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
7039 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7040 * [79:72] - VMID
7041 * [95:80] - PASID
7042 * [127:96] - reserved
7043 */
7044 /**
7045 * cik_irq_process - interrupt handler
7046 *
7047 * @rdev: radeon_device pointer
7048 *
7049 * Interrupt hander (CIK). Walk the IH ring,
7050 * ack interrupts and schedule work to handle
7051 * interrupt events.
7052 * Returns irq process return code.
7053 */
7054 int cik_irq_process(struct radeon_device *rdev)
7055 {
7056 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7057 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7058 u32 wptr;
7059 u32 rptr;
7060 u32 src_id, src_data, ring_id;
7061 u8 me_id, pipe_id, queue_id;
7062 u32 ring_index;
7063 bool queue_hotplug = false;
7064 bool queue_reset = false;
7065 u32 addr, status, mc_client;
7066 bool queue_thermal = false;
7067
7068 if (!rdev->ih.enabled || rdev->shutdown)
7069 return IRQ_NONE;
7070
7071 wptr = cik_get_ih_wptr(rdev);
7072
7073 restart_ih:
7074 /* is somebody else already processing irqs? */
7075 if (atomic_xchg(&rdev->ih.lock, 1))
7076 return IRQ_NONE;
7077
7078 rptr = rdev->ih.rptr;
7079 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7080
7081 /* Order reading of wptr vs. reading of IH ring data */
7082 rmb();
7083
7084 /* display interrupts */
7085 cik_irq_ack(rdev);
7086
7087 while (rptr != wptr) {
7088 /* wptr/rptr are in bytes! */
7089 ring_index = rptr / 4;
7090 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7091 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7092 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7093
7094 switch (src_id) {
7095 case 1: /* D1 vblank/vline */
7096 switch (src_data) {
7097 case 0: /* D1 vblank */
7098 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7099 if (rdev->irq.crtc_vblank_int[0]) {
7100 drm_handle_vblank(rdev->ddev, 0);
7101 rdev->pm.vblank_sync = true;
7102 wake_up(&rdev->irq.vblank_queue);
7103 }
7104 if (atomic_read(&rdev->irq.pflip[0]))
7105 radeon_crtc_handle_flip(rdev, 0);
7106 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7107 DRM_DEBUG("IH: D1 vblank\n");
7108 }
7109 break;
7110 case 1: /* D1 vline */
7111 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7112 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7113 DRM_DEBUG("IH: D1 vline\n");
7114 }
7115 break;
7116 default:
7117 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7118 break;
7119 }
7120 break;
7121 case 2: /* D2 vblank/vline */
7122 switch (src_data) {
7123 case 0: /* D2 vblank */
7124 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7125 if (rdev->irq.crtc_vblank_int[1]) {
7126 drm_handle_vblank(rdev->ddev, 1);
7127 rdev->pm.vblank_sync = true;
7128 wake_up(&rdev->irq.vblank_queue);
7129 }
7130 if (atomic_read(&rdev->irq.pflip[1]))
7131 radeon_crtc_handle_flip(rdev, 1);
7132 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7133 DRM_DEBUG("IH: D2 vblank\n");
7134 }
7135 break;
7136 case 1: /* D2 vline */
7137 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7138 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7139 DRM_DEBUG("IH: D2 vline\n");
7140 }
7141 break;
7142 default:
7143 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7144 break;
7145 }
7146 break;
7147 case 3: /* D3 vblank/vline */
7148 switch (src_data) {
7149 case 0: /* D3 vblank */
7150 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7151 if (rdev->irq.crtc_vblank_int[2]) {
7152 drm_handle_vblank(rdev->ddev, 2);
7153 rdev->pm.vblank_sync = true;
7154 wake_up(&rdev->irq.vblank_queue);
7155 }
7156 if (atomic_read(&rdev->irq.pflip[2]))
7157 radeon_crtc_handle_flip(rdev, 2);
7158 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7159 DRM_DEBUG("IH: D3 vblank\n");
7160 }
7161 break;
7162 case 1: /* D3 vline */
7163 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7164 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7165 DRM_DEBUG("IH: D3 vline\n");
7166 }
7167 break;
7168 default:
7169 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7170 break;
7171 }
7172 break;
7173 case 4: /* D4 vblank/vline */
7174 switch (src_data) {
7175 case 0: /* D4 vblank */
7176 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7177 if (rdev->irq.crtc_vblank_int[3]) {
7178 drm_handle_vblank(rdev->ddev, 3);
7179 rdev->pm.vblank_sync = true;
7180 wake_up(&rdev->irq.vblank_queue);
7181 }
7182 if (atomic_read(&rdev->irq.pflip[3]))
7183 radeon_crtc_handle_flip(rdev, 3);
7184 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7185 DRM_DEBUG("IH: D4 vblank\n");
7186 }
7187 break;
7188 case 1: /* D4 vline */
7189 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7190 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7191 DRM_DEBUG("IH: D4 vline\n");
7192 }
7193 break;
7194 default:
7195 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7196 break;
7197 }
7198 break;
7199 case 5: /* D5 vblank/vline */
7200 switch (src_data) {
7201 case 0: /* D5 vblank */
7202 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7203 if (rdev->irq.crtc_vblank_int[4]) {
7204 drm_handle_vblank(rdev->ddev, 4);
7205 rdev->pm.vblank_sync = true;
7206 wake_up(&rdev->irq.vblank_queue);
7207 }
7208 if (atomic_read(&rdev->irq.pflip[4]))
7209 radeon_crtc_handle_flip(rdev, 4);
7210 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7211 DRM_DEBUG("IH: D5 vblank\n");
7212 }
7213 break;
7214 case 1: /* D5 vline */
7215 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7216 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7217 DRM_DEBUG("IH: D5 vline\n");
7218 }
7219 break;
7220 default:
7221 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7222 break;
7223 }
7224 break;
7225 case 6: /* D6 vblank/vline */
7226 switch (src_data) {
7227 case 0: /* D6 vblank */
7228 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7229 if (rdev->irq.crtc_vblank_int[5]) {
7230 drm_handle_vblank(rdev->ddev, 5);
7231 rdev->pm.vblank_sync = true;
7232 wake_up(&rdev->irq.vblank_queue);
7233 }
7234 if (atomic_read(&rdev->irq.pflip[5]))
7235 radeon_crtc_handle_flip(rdev, 5);
7236 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7237 DRM_DEBUG("IH: D6 vblank\n");
7238 }
7239 break;
7240 case 1: /* D6 vline */
7241 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7242 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7243 DRM_DEBUG("IH: D6 vline\n");
7244 }
7245 break;
7246 default:
7247 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7248 break;
7249 }
7250 break;
7251 case 42: /* HPD hotplug */
7252 switch (src_data) {
7253 case 0:
7254 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7255 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7256 queue_hotplug = true;
7257 DRM_DEBUG("IH: HPD1\n");
7258 }
7259 break;
7260 case 1:
7261 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7262 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7263 queue_hotplug = true;
7264 DRM_DEBUG("IH: HPD2\n");
7265 }
7266 break;
7267 case 2:
7268 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7269 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7270 queue_hotplug = true;
7271 DRM_DEBUG("IH: HPD3\n");
7272 }
7273 break;
7274 case 3:
7275 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7276 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7277 queue_hotplug = true;
7278 DRM_DEBUG("IH: HPD4\n");
7279 }
7280 break;
7281 case 4:
7282 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7283 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7284 queue_hotplug = true;
7285 DRM_DEBUG("IH: HPD5\n");
7286 }
7287 break;
7288 case 5:
7289 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7290 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7291 queue_hotplug = true;
7292 DRM_DEBUG("IH: HPD6\n");
7293 }
7294 break;
7295 default:
7296 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7297 break;
7298 }
7299 break;
7300 case 146:
7301 case 147:
7302 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7303 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7304 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7305 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7306 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
7307 addr);
7308 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7309 status);
7310 cik_vm_decode_fault(rdev, status, addr, mc_client);
7311 /* reset addr and status */
7312 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7313 break;
7314 case 176: /* GFX RB CP_INT */
7315 case 177: /* GFX IB CP_INT */
7316 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7317 break;
7318 case 181: /* CP EOP event */
7319 DRM_DEBUG("IH: CP EOP\n");
7320 /* XXX check the bitfield order! */
7321 me_id = (ring_id & 0x60) >> 5;
7322 pipe_id = (ring_id & 0x18) >> 3;
7323 queue_id = (ring_id & 0x7) >> 0;
7324 switch (me_id) {
7325 case 0:
7326 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7327 break;
7328 case 1:
7329 case 2:
7330 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7331 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7332 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7333 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7334 break;
7335 }
7336 break;
7337 case 184: /* CP Privileged reg access */
7338 DRM_ERROR("Illegal register access in command stream\n");
7339 /* XXX check the bitfield order! */
7340 me_id = (ring_id & 0x60) >> 5;
7341 pipe_id = (ring_id & 0x18) >> 3;
7342 queue_id = (ring_id & 0x7) >> 0;
7343 switch (me_id) {
7344 case 0:
7345 /* This results in a full GPU reset, but all we need to do is soft
7346 * reset the CP for gfx
7347 */
7348 queue_reset = true;
7349 break;
7350 case 1:
7351 /* XXX compute */
7352 queue_reset = true;
7353 break;
7354 case 2:
7355 /* XXX compute */
7356 queue_reset = true;
7357 break;
7358 }
7359 break;
7360 case 185: /* CP Privileged inst */
7361 DRM_ERROR("Illegal instruction in command stream\n");
7362 /* XXX check the bitfield order! */
7363 me_id = (ring_id & 0x60) >> 5;
7364 pipe_id = (ring_id & 0x18) >> 3;
7365 queue_id = (ring_id & 0x7) >> 0;
7366 switch (me_id) {
7367 case 0:
7368 /* This results in a full GPU reset, but all we need to do is soft
7369 * reset the CP for gfx
7370 */
7371 queue_reset = true;
7372 break;
7373 case 1:
7374 /* XXX compute */
7375 queue_reset = true;
7376 break;
7377 case 2:
7378 /* XXX compute */
7379 queue_reset = true;
7380 break;
7381 }
7382 break;
7383 case 224: /* SDMA trap event */
7384 /* XXX check the bitfield order! */
7385 me_id = (ring_id & 0x3) >> 0;
7386 queue_id = (ring_id & 0xc) >> 2;
7387 DRM_DEBUG("IH: SDMA trap\n");
7388 switch (me_id) {
7389 case 0:
7390 switch (queue_id) {
7391 case 0:
7392 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7393 break;
7394 case 1:
7395 /* XXX compute */
7396 break;
7397 case 2:
7398 /* XXX compute */
7399 break;
7400 }
7401 break;
7402 case 1:
7403 switch (queue_id) {
7404 case 0:
7405 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7406 break;
7407 case 1:
7408 /* XXX compute */
7409 break;
7410 case 2:
7411 /* XXX compute */
7412 break;
7413 }
7414 break;
7415 }
7416 break;
7417 case 230: /* thermal low to high */
7418 DRM_DEBUG("IH: thermal low to high\n");
7419 rdev->pm.dpm.thermal.high_to_low = false;
7420 queue_thermal = true;
7421 break;
7422 case 231: /* thermal high to low */
7423 DRM_DEBUG("IH: thermal high to low\n");
7424 rdev->pm.dpm.thermal.high_to_low = true;
7425 queue_thermal = true;
7426 break;
7427 case 233: /* GUI IDLE */
7428 DRM_DEBUG("IH: GUI idle\n");
7429 break;
7430 case 241: /* SDMA Privileged inst */
7431 case 247: /* SDMA Privileged inst */
7432 DRM_ERROR("Illegal instruction in SDMA command stream\n");
7433 /* XXX check the bitfield order! */
7434 me_id = (ring_id & 0x3) >> 0;
7435 queue_id = (ring_id & 0xc) >> 2;
7436 switch (me_id) {
7437 case 0:
7438 switch (queue_id) {
7439 case 0:
7440 queue_reset = true;
7441 break;
7442 case 1:
7443 /* XXX compute */
7444 queue_reset = true;
7445 break;
7446 case 2:
7447 /* XXX compute */
7448 queue_reset = true;
7449 break;
7450 }
7451 break;
7452 case 1:
7453 switch (queue_id) {
7454 case 0:
7455 queue_reset = true;
7456 break;
7457 case 1:
7458 /* XXX compute */
7459 queue_reset = true;
7460 break;
7461 case 2:
7462 /* XXX compute */
7463 queue_reset = true;
7464 break;
7465 }
7466 break;
7467 }
7468 break;
7469 default:
7470 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7471 break;
7472 }
7473
7474 /* wptr/rptr are in bytes! */
7475 rptr += 16;
7476 rptr &= rdev->ih.ptr_mask;
7477 }
7478 if (queue_hotplug)
7479 schedule_work(&rdev->hotplug_work);
7480 if (queue_reset)
7481 schedule_work(&rdev->reset_work);
7482 if (queue_thermal)
7483 schedule_work(&rdev->pm.dpm.thermal.work);
7484 rdev->ih.rptr = rptr;
7485 WREG32(IH_RB_RPTR, rdev->ih.rptr);
7486 atomic_set(&rdev->ih.lock, 0);
7487
7488 /* make sure wptr hasn't changed while processing */
7489 wptr = cik_get_ih_wptr(rdev);
7490 if (wptr != rptr)
7491 goto restart_ih;
7492
7493 return IRQ_HANDLED;
7494 }
7495
7496 /*
7497 * startup/shutdown callbacks
7498 */
7499 /**
7500 * cik_startup - program the asic to a functional state
7501 *
7502 * @rdev: radeon_device pointer
7503 *
7504 * Programs the asic to a functional state (CIK).
7505 * Called by cik_init() and cik_resume().
7506 * Returns 0 for success, error for failure.
7507 */
7508 static int cik_startup(struct radeon_device *rdev)
7509 {
7510 struct radeon_ring *ring;
7511 int r;
7512
7513 /* enable pcie gen2/3 link */
7514 cik_pcie_gen3_enable(rdev);
7515 /* enable aspm */
7516 cik_program_aspm(rdev);
7517
7518 cik_mc_program(rdev);
7519
7520 if (rdev->flags & RADEON_IS_IGP) {
7521 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7522 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
7523 r = cik_init_microcode(rdev);
7524 if (r) {
7525 DRM_ERROR("Failed to load firmware!\n");
7526 return r;
7527 }
7528 }
7529 } else {
7530 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7531 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
7532 !rdev->mc_fw) {
7533 r = cik_init_microcode(rdev);
7534 if (r) {
7535 DRM_ERROR("Failed to load firmware!\n");
7536 return r;
7537 }
7538 }
7539
7540 r = ci_mc_load_microcode(rdev);
7541 if (r) {
7542 DRM_ERROR("Failed to load MC firmware!\n");
7543 return r;
7544 }
7545 }
7546
7547 r = r600_vram_scratch_init(rdev);
7548 if (r)
7549 return r;
7550
7551 r = cik_pcie_gart_enable(rdev);
7552 if (r)
7553 return r;
7554 cik_gpu_init(rdev);
7555
7556 /* allocate rlc buffers */
7557 if (rdev->flags & RADEON_IS_IGP) {
7558 if (rdev->family == CHIP_KAVERI) {
7559 rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7560 rdev->rlc.reg_list_size =
7561 (u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7562 } else {
7563 rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7564 rdev->rlc.reg_list_size =
7565 (u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7566 }
7567 }
7568 rdev->rlc.cs_data = ci_cs_data;
7569 rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7570 r = sumo_rlc_init(rdev);
7571 if (r) {
7572 DRM_ERROR("Failed to init rlc BOs!\n");
7573 return r;
7574 }
7575
7576 /* allocate wb buffer */
7577 r = radeon_wb_init(rdev);
7578 if (r)
7579 return r;
7580
7581 /* allocate mec buffers */
7582 r = cik_mec_init(rdev);
7583 if (r) {
7584 DRM_ERROR("Failed to init MEC BOs!\n");
7585 return r;
7586 }
7587
7588 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7589 if (r) {
7590 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7591 return r;
7592 }
7593
7594 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7595 if (r) {
7596 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7597 return r;
7598 }
7599
7600 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7601 if (r) {
7602 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7603 return r;
7604 }
7605
7606 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7607 if (r) {
7608 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7609 return r;
7610 }
7611
7612 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7613 if (r) {
7614 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7615 return r;
7616 }
7617
7618 r = uvd_v4_2_resume(rdev);
7619 if (!r) {
7620 r = radeon_fence_driver_start_ring(rdev,
7621 R600_RING_TYPE_UVD_INDEX);
7622 if (r)
7623 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7624 }
7625 if (r)
7626 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7627
7628 /* Enable IRQ */
7629 if (!rdev->irq.installed) {
7630 r = radeon_irq_kms_init(rdev);
7631 if (r)
7632 return r;
7633 }
7634
7635 r = cik_irq_init(rdev);
7636 if (r) {
7637 DRM_ERROR("radeon: IH init failed (%d).\n", r);
7638 radeon_irq_kms_fini(rdev);
7639 return r;
7640 }
7641 cik_irq_set(rdev);
7642
7643 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7644 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7645 CP_RB0_RPTR, CP_RB0_WPTR,
7646 RADEON_CP_PACKET2);
7647 if (r)
7648 return r;
7649
7650 /* set up the compute queues */
7651 /* type-2 packets are deprecated on MEC, use type-3 instead */
7652 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7653 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7654 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7655 PACKET3(PACKET3_NOP, 0x3FFF));
7656 if (r)
7657 return r;
7658 ring->me = 1; /* first MEC */
7659 ring->pipe = 0; /* first pipe */
7660 ring->queue = 0; /* first queue */
7661 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7662
7663 /* type-2 packets are deprecated on MEC, use type-3 instead */
7664 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7665 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7666 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7667 PACKET3(PACKET3_NOP, 0x3FFF));
7668 if (r)
7669 return r;
7670 /* dGPU only have 1 MEC */
7671 ring->me = 1; /* first MEC */
7672 ring->pipe = 0; /* first pipe */
7673 ring->queue = 1; /* second queue */
7674 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7675
7676 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7677 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7678 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7679 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7680 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7681 if (r)
7682 return r;
7683
7684 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7685 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7686 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7687 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7688 SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7689 if (r)
7690 return r;
7691
7692 r = cik_cp_resume(rdev);
7693 if (r)
7694 return r;
7695
7696 r = cik_sdma_resume(rdev);
7697 if (r)
7698 return r;
7699
7700 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7701 if (ring->ring_size) {
7702 r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7703 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7704 RADEON_CP_PACKET2);
7705 if (!r)
7706 r = uvd_v1_0_init(rdev);
7707 if (r)
7708 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7709 }
7710
7711 r = radeon_ib_pool_init(rdev);
7712 if (r) {
7713 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7714 return r;
7715 }
7716
7717 r = radeon_vm_manager_init(rdev);
7718 if (r) {
7719 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7720 return r;
7721 }
7722
7723 return 0;
7724 }
7725
7726 /**
7727 * cik_resume - resume the asic to a functional state
7728 *
7729 * @rdev: radeon_device pointer
7730 *
7731 * Programs the asic to a functional state (CIK).
7732 * Called at resume.
7733 * Returns 0 for success, error for failure.
7734 */
7735 int cik_resume(struct radeon_device *rdev)
7736 {
7737 int r;
7738
7739 /* post card */
7740 atom_asic_init(rdev->mode_info.atom_context);
7741
7742 /* init golden registers */
7743 cik_init_golden_registers(rdev);
7744
7745 rdev->accel_working = true;
7746 r = cik_startup(rdev);
7747 if (r) {
7748 DRM_ERROR("cik startup failed on resume\n");
7749 rdev->accel_working = false;
7750 return r;
7751 }
7752
7753 return r;
7754
7755 }
7756
7757 /**
7758 * cik_suspend - suspend the asic
7759 *
7760 * @rdev: radeon_device pointer
7761 *
7762 * Bring the chip into a state suitable for suspend (CIK).
7763 * Called at suspend.
7764 * Returns 0 for success.
7765 */
7766 int cik_suspend(struct radeon_device *rdev)
7767 {
7768 radeon_vm_manager_fini(rdev);
7769 cik_cp_enable(rdev, false);
7770 cik_sdma_enable(rdev, false);
7771 uvd_v1_0_fini(rdev);
7772 radeon_uvd_suspend(rdev);
7773 cik_irq_suspend(rdev);
7774 radeon_wb_disable(rdev);
7775 cik_pcie_gart_disable(rdev);
7776 return 0;
7777 }
7778
7779 /* Plan is to move initialization in that function and use
7780 * helper function so that radeon_device_init pretty much
7781 * do nothing more than calling asic specific function. This
7782 * should also allow to remove a bunch of callback function
7783 * like vram_info.
7784 */
7785 /**
7786 * cik_init - asic specific driver and hw init
7787 *
7788 * @rdev: radeon_device pointer
7789 *
7790 * Setup asic specific driver variables and program the hw
7791 * to a functional state (CIK).
7792 * Called at driver startup.
7793 * Returns 0 for success, errors for failure.
7794 */
7795 int cik_init(struct radeon_device *rdev)
7796 {
7797 struct radeon_ring *ring;
7798 int r;
7799
7800 /* Read BIOS */
7801 if (!radeon_get_bios(rdev)) {
7802 if (ASIC_IS_AVIVO(rdev))
7803 return -EINVAL;
7804 }
7805 /* Must be an ATOMBIOS */
7806 if (!rdev->is_atom_bios) {
7807 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7808 return -EINVAL;
7809 }
7810 r = radeon_atombios_init(rdev);
7811 if (r)
7812 return r;
7813
7814 /* Post card if necessary */
7815 if (!radeon_card_posted(rdev)) {
7816 if (!rdev->bios) {
7817 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7818 return -EINVAL;
7819 }
7820 DRM_INFO("GPU not posted. posting now...\n");
7821 atom_asic_init(rdev->mode_info.atom_context);
7822 }
7823 /* init golden registers */
7824 cik_init_golden_registers(rdev);
7825 /* Initialize scratch registers */
7826 cik_scratch_init(rdev);
7827 /* Initialize surface registers */
7828 radeon_surface_init(rdev);
7829 /* Initialize clocks */
7830 radeon_get_clock_info(rdev->ddev);
7831
7832 /* Fence driver */
7833 r = radeon_fence_driver_init(rdev);
7834 if (r)
7835 return r;
7836
7837 /* initialize memory controller */
7838 r = cik_mc_init(rdev);
7839 if (r)
7840 return r;
7841 /* Memory manager */
7842 r = radeon_bo_init(rdev);
7843 if (r)
7844 return r;
7845
7846 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7847 ring->ring_obj = NULL;
7848 r600_ring_init(rdev, ring, 1024 * 1024);
7849
7850 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7851 ring->ring_obj = NULL;
7852 r600_ring_init(rdev, ring, 1024 * 1024);
7853 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7854 if (r)
7855 return r;
7856
7857 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7858 ring->ring_obj = NULL;
7859 r600_ring_init(rdev, ring, 1024 * 1024);
7860 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7861 if (r)
7862 return r;
7863
7864 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7865 ring->ring_obj = NULL;
7866 r600_ring_init(rdev, ring, 256 * 1024);
7867
7868 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7869 ring->ring_obj = NULL;
7870 r600_ring_init(rdev, ring, 256 * 1024);
7871
7872 r = radeon_uvd_init(rdev);
7873 if (!r) {
7874 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7875 ring->ring_obj = NULL;
7876 r600_ring_init(rdev, ring, 4096);
7877 }
7878
7879 rdev->ih.ring_obj = NULL;
7880 r600_ih_ring_init(rdev, 64 * 1024);
7881
7882 r = r600_pcie_gart_init(rdev);
7883 if (r)
7884 return r;
7885
7886 rdev->accel_working = true;
7887 r = cik_startup(rdev);
7888 if (r) {
7889 dev_err(rdev->dev, "disabling GPU acceleration\n");
7890 cik_cp_fini(rdev);
7891 cik_sdma_fini(rdev);
7892 cik_irq_fini(rdev);
7893 sumo_rlc_fini(rdev);
7894 cik_mec_fini(rdev);
7895 radeon_wb_fini(rdev);
7896 radeon_ib_pool_fini(rdev);
7897 radeon_vm_manager_fini(rdev);
7898 radeon_irq_kms_fini(rdev);
7899 cik_pcie_gart_fini(rdev);
7900 rdev->accel_working = false;
7901 }
7902
7903 /* Don't start up if the MC ucode is missing.
7904 * The default clocks and voltages before the MC ucode
7905 * is loaded are not suffient for advanced operations.
7906 */
7907 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7908 DRM_ERROR("radeon: MC ucode required for NI+.\n");
7909 return -EINVAL;
7910 }
7911
7912 return 0;
7913 }
7914
7915 /**
7916 * cik_fini - asic specific driver and hw fini
7917 *
7918 * @rdev: radeon_device pointer
7919 *
7920 * Tear down the asic specific driver variables and program the hw
7921 * to an idle state (CIK).
7922 * Called at driver unload.
7923 */
7924 void cik_fini(struct radeon_device *rdev)
7925 {
7926 cik_cp_fini(rdev);
7927 cik_sdma_fini(rdev);
7928 cik_irq_fini(rdev);
7929 sumo_rlc_fini(rdev);
7930 cik_mec_fini(rdev);
7931 radeon_wb_fini(rdev);
7932 radeon_vm_manager_fini(rdev);
7933 radeon_ib_pool_fini(rdev);
7934 radeon_irq_kms_fini(rdev);
7935 uvd_v1_0_fini(rdev);
7936 radeon_uvd_fini(rdev);
7937 cik_pcie_gart_fini(rdev);
7938 r600_vram_scratch_fini(rdev);
7939 radeon_gem_fini(rdev);
7940 radeon_fence_driver_fini(rdev);
7941 radeon_bo_fini(rdev);
7942 radeon_atombios_fini(rdev);
7943 kfree(rdev->bios);
7944 rdev->bios = NULL;
7945 }
7946
7947 /* display watermark setup */
7948 /**
7949 * dce8_line_buffer_adjust - Set up the line buffer
7950 *
7951 * @rdev: radeon_device pointer
7952 * @radeon_crtc: the selected display controller
7953 * @mode: the current display mode on the selected display
7954 * controller
7955 *
7956 * Setup up the line buffer allocation for
7957 * the selected display controller (CIK).
7958 * Returns the line buffer size in pixels.
7959 */
7960 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7961 struct radeon_crtc *radeon_crtc,
7962 struct drm_display_mode *mode)
7963 {
7964 u32 tmp;
7965
7966 /*
7967 * Line Buffer Setup
7968 * There are 6 line buffers, one for each display controllers.
7969 * There are 3 partitions per LB. Select the number of partitions
7970 * to enable based on the display width. For display widths larger
7971 * than 4096, you need use to use 2 display controllers and combine
7972 * them using the stereo blender.
7973 */
7974 if (radeon_crtc->base.enabled && mode) {
7975 if (mode->crtc_hdisplay < 1920)
7976 tmp = 1;
7977 else if (mode->crtc_hdisplay < 2560)
7978 tmp = 2;
7979 else if (mode->crtc_hdisplay < 4096)
7980 tmp = 0;
7981 else {
7982 DRM_DEBUG_KMS("Mode too big for LB!\n");
7983 tmp = 0;
7984 }
7985 } else
7986 tmp = 1;
7987
7988 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7989 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7990
7991 if (radeon_crtc->base.enabled && mode) {
7992 switch (tmp) {
7993 case 0:
7994 default:
7995 return 4096 * 2;
7996 case 1:
7997 return 1920 * 2;
7998 case 2:
7999 return 2560 * 2;
8000 }
8001 }
8002
8003 /* controller not enabled, so no lb used */
8004 return 0;
8005 }
8006
8007 /**
8008 * cik_get_number_of_dram_channels - get the number of dram channels
8009 *
8010 * @rdev: radeon_device pointer
8011 *
8012 * Look up the number of video ram channels (CIK).
8013 * Used for display watermark bandwidth calculations
8014 * Returns the number of dram channels
8015 */
8016 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8017 {
8018 u32 tmp = RREG32(MC_SHARED_CHMAP);
8019
8020 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8021 case 0:
8022 default:
8023 return 1;
8024 case 1:
8025 return 2;
8026 case 2:
8027 return 4;
8028 case 3:
8029 return 8;
8030 case 4:
8031 return 3;
8032 case 5:
8033 return 6;
8034 case 6:
8035 return 10;
8036 case 7:
8037 return 12;
8038 case 8:
8039 return 16;
8040 }
8041 }
8042
8043 struct dce8_wm_params {
8044 u32 dram_channels; /* number of dram channels */
8045 u32 yclk; /* bandwidth per dram data pin in kHz */
8046 u32 sclk; /* engine clock in kHz */
8047 u32 disp_clk; /* display clock in kHz */
8048 u32 src_width; /* viewport width */
8049 u32 active_time; /* active display time in ns */
8050 u32 blank_time; /* blank time in ns */
8051 bool interlaced; /* mode is interlaced */
8052 fixed20_12 vsc; /* vertical scale ratio */
8053 u32 num_heads; /* number of active crtcs */
8054 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8055 u32 lb_size; /* line buffer allocated to pipe */
8056 u32 vtaps; /* vertical scaler taps */
8057 };
8058
8059 /**
8060 * dce8_dram_bandwidth - get the dram bandwidth
8061 *
8062 * @wm: watermark calculation data
8063 *
8064 * Calculate the raw dram bandwidth (CIK).
8065 * Used for display watermark bandwidth calculations
8066 * Returns the dram bandwidth in MBytes/s
8067 */
8068 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8069 {
8070 /* Calculate raw DRAM Bandwidth */
8071 fixed20_12 dram_efficiency; /* 0.7 */
8072 fixed20_12 yclk, dram_channels, bandwidth;
8073 fixed20_12 a;
8074
8075 a.full = dfixed_const(1000);
8076 yclk.full = dfixed_const(wm->yclk);
8077 yclk.full = dfixed_div(yclk, a);
8078 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8079 a.full = dfixed_const(10);
8080 dram_efficiency.full = dfixed_const(7);
8081 dram_efficiency.full = dfixed_div(dram_efficiency, a);
8082 bandwidth.full = dfixed_mul(dram_channels, yclk);
8083 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8084
8085 return dfixed_trunc(bandwidth);
8086 }
8087
8088 /**
8089 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8090 *
8091 * @wm: watermark calculation data
8092 *
8093 * Calculate the dram bandwidth used for display (CIK).
8094 * Used for display watermark bandwidth calculations
8095 * Returns the dram bandwidth for display in MBytes/s
8096 */
8097 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8098 {
8099 /* Calculate DRAM Bandwidth and the part allocated to display. */
8100 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8101 fixed20_12 yclk, dram_channels, bandwidth;
8102 fixed20_12 a;
8103
8104 a.full = dfixed_const(1000);
8105 yclk.full = dfixed_const(wm->yclk);
8106 yclk.full = dfixed_div(yclk, a);
8107 dram_channels.full = dfixed_const(wm->dram_channels * 4);
8108 a.full = dfixed_const(10);
8109 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8110 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8111 bandwidth.full = dfixed_mul(dram_channels, yclk);
8112 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8113
8114 return dfixed_trunc(bandwidth);
8115 }
8116
8117 /**
8118 * dce8_data_return_bandwidth - get the data return bandwidth
8119 *
8120 * @wm: watermark calculation data
8121 *
8122 * Calculate the data return bandwidth used for display (CIK).
8123 * Used for display watermark bandwidth calculations
8124 * Returns the data return bandwidth in MBytes/s
8125 */
8126 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8127 {
8128 /* Calculate the display Data return Bandwidth */
8129 fixed20_12 return_efficiency; /* 0.8 */
8130 fixed20_12 sclk, bandwidth;
8131 fixed20_12 a;
8132
8133 a.full = dfixed_const(1000);
8134 sclk.full = dfixed_const(wm->sclk);
8135 sclk.full = dfixed_div(sclk, a);
8136 a.full = dfixed_const(10);
8137 return_efficiency.full = dfixed_const(8);
8138 return_efficiency.full = dfixed_div(return_efficiency, a);
8139 a.full = dfixed_const(32);
8140 bandwidth.full = dfixed_mul(a, sclk);
8141 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8142
8143 return dfixed_trunc(bandwidth);
8144 }
8145
8146 /**
8147 * dce8_dmif_request_bandwidth - get the dmif bandwidth
8148 *
8149 * @wm: watermark calculation data
8150 *
8151 * Calculate the dmif bandwidth used for display (CIK).
8152 * Used for display watermark bandwidth calculations
8153 * Returns the dmif bandwidth in MBytes/s
8154 */
8155 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8156 {
8157 /* Calculate the DMIF Request Bandwidth */
8158 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8159 fixed20_12 disp_clk, bandwidth;
8160 fixed20_12 a, b;
8161
8162 a.full = dfixed_const(1000);
8163 disp_clk.full = dfixed_const(wm->disp_clk);
8164 disp_clk.full = dfixed_div(disp_clk, a);
8165 a.full = dfixed_const(32);
8166 b.full = dfixed_mul(a, disp_clk);
8167
8168 a.full = dfixed_const(10);
8169 disp_clk_request_efficiency.full = dfixed_const(8);
8170 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8171
8172 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8173
8174 return dfixed_trunc(bandwidth);
8175 }
8176
8177 /**
8178 * dce8_available_bandwidth - get the min available bandwidth
8179 *
8180 * @wm: watermark calculation data
8181 *
8182 * Calculate the min available bandwidth used for display (CIK).
8183 * Used for display watermark bandwidth calculations
8184 * Returns the min available bandwidth in MBytes/s
8185 */
8186 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8187 {
8188 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8189 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8190 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8191 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8192
8193 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8194 }
8195
8196 /**
8197 * dce8_average_bandwidth - get the average available bandwidth
8198 *
8199 * @wm: watermark calculation data
8200 *
8201 * Calculate the average available bandwidth used for display (CIK).
8202 * Used for display watermark bandwidth calculations
8203 * Returns the average available bandwidth in MBytes/s
8204 */
8205 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8206 {
8207 /* Calculate the display mode Average Bandwidth
8208 * DisplayMode should contain the source and destination dimensions,
8209 * timing, etc.
8210 */
8211 fixed20_12 bpp;
8212 fixed20_12 line_time;
8213 fixed20_12 src_width;
8214 fixed20_12 bandwidth;
8215 fixed20_12 a;
8216
8217 a.full = dfixed_const(1000);
8218 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8219 line_time.full = dfixed_div(line_time, a);
8220 bpp.full = dfixed_const(wm->bytes_per_pixel);
8221 src_width.full = dfixed_const(wm->src_width);
8222 bandwidth.full = dfixed_mul(src_width, bpp);
8223 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8224 bandwidth.full = dfixed_div(bandwidth, line_time);
8225
8226 return dfixed_trunc(bandwidth);
8227 }
8228
8229 /**
8230 * dce8_latency_watermark - get the latency watermark
8231 *
8232 * @wm: watermark calculation data
8233 *
8234 * Calculate the latency watermark (CIK).
8235 * Used for display watermark bandwidth calculations
8236 * Returns the latency watermark in ns
8237 */
8238 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8239 {
8240 /* First calculate the latency in ns */
8241 u32 mc_latency = 2000; /* 2000 ns. */
8242 u32 available_bandwidth = dce8_available_bandwidth(wm);
8243 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8244 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8245 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8246 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8247 (wm->num_heads * cursor_line_pair_return_time);
8248 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8249 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8250 u32 tmp, dmif_size = 12288;
8251 fixed20_12 a, b, c;
8252
8253 if (wm->num_heads == 0)
8254 return 0;
8255
8256 a.full = dfixed_const(2);
8257 b.full = dfixed_const(1);
8258 if ((wm->vsc.full > a.full) ||
8259 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8260 (wm->vtaps >= 5) ||
8261 ((wm->vsc.full >= a.full) && wm->interlaced))
8262 max_src_lines_per_dst_line = 4;
8263 else
8264 max_src_lines_per_dst_line = 2;
8265
8266 a.full = dfixed_const(available_bandwidth);
8267 b.full = dfixed_const(wm->num_heads);
8268 a.full = dfixed_div(a, b);
8269
8270 b.full = dfixed_const(mc_latency + 512);
8271 c.full = dfixed_const(wm->disp_clk);
8272 b.full = dfixed_div(b, c);
8273
8274 c.full = dfixed_const(dmif_size);
8275 b.full = dfixed_div(c, b);
8276
8277 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8278
8279 b.full = dfixed_const(1000);
8280 c.full = dfixed_const(wm->disp_clk);
8281 b.full = dfixed_div(c, b);
8282 c.full = dfixed_const(wm->bytes_per_pixel);
8283 b.full = dfixed_mul(b, c);
8284
8285 lb_fill_bw = min(tmp, dfixed_trunc(b));
8286
8287 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8288 b.full = dfixed_const(1000);
8289 c.full = dfixed_const(lb_fill_bw);
8290 b.full = dfixed_div(c, b);
8291 a.full = dfixed_div(a, b);
8292 line_fill_time = dfixed_trunc(a);
8293
8294 if (line_fill_time < wm->active_time)
8295 return latency;
8296 else
8297 return latency + (line_fill_time - wm->active_time);
8298
8299 }
8300
8301 /**
8302 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8303 * average and available dram bandwidth
8304 *
8305 * @wm: watermark calculation data
8306 *
8307 * Check if the display average bandwidth fits in the display
8308 * dram bandwidth (CIK).
8309 * Used for display watermark bandwidth calculations
8310 * Returns true if the display fits, false if not.
8311 */
8312 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8313 {
8314 if (dce8_average_bandwidth(wm) <=
8315 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8316 return true;
8317 else
8318 return false;
8319 }
8320
8321 /**
8322 * dce8_average_bandwidth_vs_available_bandwidth - check
8323 * average and available bandwidth
8324 *
8325 * @wm: watermark calculation data
8326 *
8327 * Check if the display average bandwidth fits in the display
8328 * available bandwidth (CIK).
8329 * Used for display watermark bandwidth calculations
8330 * Returns true if the display fits, false if not.
8331 */
8332 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8333 {
8334 if (dce8_average_bandwidth(wm) <=
8335 (dce8_available_bandwidth(wm) / wm->num_heads))
8336 return true;
8337 else
8338 return false;
8339 }
8340
8341 /**
8342 * dce8_check_latency_hiding - check latency hiding
8343 *
8344 * @wm: watermark calculation data
8345 *
8346 * Check latency hiding (CIK).
8347 * Used for display watermark bandwidth calculations
8348 * Returns true if the display fits, false if not.
8349 */
8350 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8351 {
8352 u32 lb_partitions = wm->lb_size / wm->src_width;
8353 u32 line_time = wm->active_time + wm->blank_time;
8354 u32 latency_tolerant_lines;
8355 u32 latency_hiding;
8356 fixed20_12 a;
8357
8358 a.full = dfixed_const(1);
8359 if (wm->vsc.full > a.full)
8360 latency_tolerant_lines = 1;
8361 else {
8362 if (lb_partitions <= (wm->vtaps + 1))
8363 latency_tolerant_lines = 1;
8364 else
8365 latency_tolerant_lines = 2;
8366 }
8367
8368 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8369
8370 if (dce8_latency_watermark(wm) <= latency_hiding)
8371 return true;
8372 else
8373 return false;
8374 }
8375
8376 /**
8377 * dce8_program_watermarks - program display watermarks
8378 *
8379 * @rdev: radeon_device pointer
8380 * @radeon_crtc: the selected display controller
8381 * @lb_size: line buffer size
8382 * @num_heads: number of display controllers in use
8383 *
8384 * Calculate and program the display watermarks for the
8385 * selected display controller (CIK).
8386 */
8387 static void dce8_program_watermarks(struct radeon_device *rdev,
8388 struct radeon_crtc *radeon_crtc,
8389 u32 lb_size, u32 num_heads)
8390 {
8391 struct drm_display_mode *mode = &radeon_crtc->base.mode;
8392 struct dce8_wm_params wm_low, wm_high;
8393 u32 pixel_period;
8394 u32 line_time = 0;
8395 u32 latency_watermark_a = 0, latency_watermark_b = 0;
8396 u32 tmp, wm_mask;
8397
8398 if (radeon_crtc->base.enabled && num_heads && mode) {
8399 pixel_period = 1000000 / (u32)mode->clock;
8400 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8401
8402 /* watermark for high clocks */
8403 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8404 rdev->pm.dpm_enabled) {
8405 wm_high.yclk =
8406 radeon_dpm_get_mclk(rdev, false) * 10;
8407 wm_high.sclk =
8408 radeon_dpm_get_sclk(rdev, false) * 10;
8409 } else {
8410 wm_high.yclk = rdev->pm.current_mclk * 10;
8411 wm_high.sclk = rdev->pm.current_sclk * 10;
8412 }
8413
8414 wm_high.disp_clk = mode->clock;
8415 wm_high.src_width = mode->crtc_hdisplay;
8416 wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8417 wm_high.blank_time = line_time - wm_high.active_time;
8418 wm_high.interlaced = false;
8419 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8420 wm_high.interlaced = true;
8421 wm_high.vsc = radeon_crtc->vsc;
8422 wm_high.vtaps = 1;
8423 if (radeon_crtc->rmx_type != RMX_OFF)
8424 wm_high.vtaps = 2;
8425 wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8426 wm_high.lb_size = lb_size;
8427 wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8428 wm_high.num_heads = num_heads;
8429
8430 /* set for high clocks */
8431 latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8432
8433 /* possibly force display priority to high */
8434 /* should really do this at mode validation time... */
8435 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8436 !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8437 !dce8_check_latency_hiding(&wm_high) ||
8438 (rdev->disp_priority == 2)) {
8439 DRM_DEBUG_KMS("force priority to high\n");
8440 }
8441
8442 /* watermark for low clocks */
8443 if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8444 rdev->pm.dpm_enabled) {
8445 wm_low.yclk =
8446 radeon_dpm_get_mclk(rdev, true) * 10;
8447 wm_low.sclk =
8448 radeon_dpm_get_sclk(rdev, true) * 10;
8449 } else {
8450 wm_low.yclk = rdev->pm.current_mclk * 10;
8451 wm_low.sclk = rdev->pm.current_sclk * 10;
8452 }
8453
8454 wm_low.disp_clk = mode->clock;
8455 wm_low.src_width = mode->crtc_hdisplay;
8456 wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8457 wm_low.blank_time = line_time - wm_low.active_time;
8458 wm_low.interlaced = false;
8459 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8460 wm_low.interlaced = true;
8461 wm_low.vsc = radeon_crtc->vsc;
8462 wm_low.vtaps = 1;
8463 if (radeon_crtc->rmx_type != RMX_OFF)
8464 wm_low.vtaps = 2;
8465 wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8466 wm_low.lb_size = lb_size;
8467 wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8468 wm_low.num_heads = num_heads;
8469
8470 /* set for low clocks */
8471 latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8472
8473 /* possibly force display priority to high */
8474 /* should really do this at mode validation time... */
8475 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8476 !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8477 !dce8_check_latency_hiding(&wm_low) ||
8478 (rdev->disp_priority == 2)) {
8479 DRM_DEBUG_KMS("force priority to high\n");
8480 }
8481 }
8482
8483 /* select wm A */
8484 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8485 tmp = wm_mask;
8486 tmp &= ~LATENCY_WATERMARK_MASK(3);
8487 tmp |= LATENCY_WATERMARK_MASK(1);
8488 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8489 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8490 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8491 LATENCY_HIGH_WATERMARK(line_time)));
8492 /* select wm B */
8493 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8494 tmp &= ~LATENCY_WATERMARK_MASK(3);
8495 tmp |= LATENCY_WATERMARK_MASK(2);
8496 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8497 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8498 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8499 LATENCY_HIGH_WATERMARK(line_time)));
8500 /* restore original selection */
8501 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8502
8503 /* save values for DPM */
8504 radeon_crtc->line_time = line_time;
8505 radeon_crtc->wm_high = latency_watermark_a;
8506 radeon_crtc->wm_low = latency_watermark_b;
8507 }
8508
8509 /**
8510 * dce8_bandwidth_update - program display watermarks
8511 *
8512 * @rdev: radeon_device pointer
8513 *
8514 * Calculate and program the display watermarks and line
8515 * buffer allocation (CIK).
8516 */
8517 void dce8_bandwidth_update(struct radeon_device *rdev)
8518 {
8519 struct drm_display_mode *mode = NULL;
8520 u32 num_heads = 0, lb_size;
8521 int i;
8522
8523 radeon_update_display_priority(rdev);
8524
8525 for (i = 0; i < rdev->num_crtc; i++) {
8526 if (rdev->mode_info.crtcs[i]->base.enabled)
8527 num_heads++;
8528 }
8529 for (i = 0; i < rdev->num_crtc; i++) {
8530 mode = &rdev->mode_info.crtcs[i]->base.mode;
8531 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8532 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8533 }
8534 }
8535
8536 /**
8537 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8538 *
8539 * @rdev: radeon_device pointer
8540 *
8541 * Fetches a GPU clock counter snapshot (SI).
8542 * Returns the 64 bit clock counter snapshot.
8543 */
8544 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8545 {
8546 uint64_t clock;
8547
8548 mutex_lock(&rdev->gpu_clock_mutex);
8549 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8550 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8551 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8552 mutex_unlock(&rdev->gpu_clock_mutex);
8553 return clock;
8554 }
8555
8556 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8557 u32 cntl_reg, u32 status_reg)
8558 {
8559 int r, i;
8560 struct atom_clock_dividers dividers;
8561 uint32_t tmp;
8562
8563 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8564 clock, false, &dividers);
8565 if (r)
8566 return r;
8567
8568 tmp = RREG32_SMC(cntl_reg);
8569 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8570 tmp |= dividers.post_divider;
8571 WREG32_SMC(cntl_reg, tmp);
8572
8573 for (i = 0; i < 100; i++) {
8574 if (RREG32_SMC(status_reg) & DCLK_STATUS)
8575 break;
8576 mdelay(10);
8577 }
8578 if (i == 100)
8579 return -ETIMEDOUT;
8580
8581 return 0;
8582 }
8583
8584 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8585 {
8586 int r = 0;
8587
8588 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8589 if (r)
8590 return r;
8591
8592 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8593 return r;
8594 }
8595
8596 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8597 {
8598 struct pci_dev *root = rdev->pdev->bus->self;
8599 int bridge_pos, gpu_pos;
8600 u32 speed_cntl, mask, current_data_rate;
8601 int ret, i;
8602 u16 tmp16;
8603
8604 if (radeon_pcie_gen2 == 0)
8605 return;
8606
8607 if (rdev->flags & RADEON_IS_IGP)
8608 return;
8609
8610 if (!(rdev->flags & RADEON_IS_PCIE))
8611 return;
8612
8613 ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8614 if (ret != 0)
8615 return;
8616
8617 if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8618 return;
8619
8620 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8621 current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8622 LC_CURRENT_DATA_RATE_SHIFT;
8623 if (mask & DRM_PCIE_SPEED_80) {
8624 if (current_data_rate == 2) {
8625 DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8626 return;
8627 }
8628 DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8629 } else if (mask & DRM_PCIE_SPEED_50) {
8630 if (current_data_rate == 1) {
8631 DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8632 return;
8633 }
8634 DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8635 }
8636
8637 bridge_pos = pci_pcie_cap(root);
8638 if (!bridge_pos)
8639 return;
8640
8641 gpu_pos = pci_pcie_cap(rdev->pdev);
8642 if (!gpu_pos)
8643 return;
8644
8645 if (mask & DRM_PCIE_SPEED_80) {
8646 /* re-try equalization if gen3 is not already enabled */
8647 if (current_data_rate != 2) {
8648 u16 bridge_cfg, gpu_cfg;
8649 u16 bridge_cfg2, gpu_cfg2;
8650 u32 max_lw, current_lw, tmp;
8651
8652 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8653 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8654
8655 tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8656 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8657
8658 tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8659 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8660
8661 tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8662 max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8663 current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8664
8665 if (current_lw < max_lw) {
8666 tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8667 if (tmp & LC_RENEGOTIATION_SUPPORT) {
8668 tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8669 tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8670 tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8671 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8672 }
8673 }
8674
8675 for (i = 0; i < 10; i++) {
8676 /* check status */
8677 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8678 if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8679 break;
8680
8681 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8682 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8683
8684 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8685 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8686
8687 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8688 tmp |= LC_SET_QUIESCE;
8689 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8690
8691 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8692 tmp |= LC_REDO_EQ;
8693 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8694
8695 mdelay(100);
8696
8697 /* linkctl */
8698 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8699 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8700 tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8701 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8702
8703 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8704 tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8705 tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8706 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8707
8708 /* linkctl2 */
8709 pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8710 tmp16 &= ~((1 << 4) | (7 << 9));
8711 tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8712 pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8713
8714 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8715 tmp16 &= ~((1 << 4) | (7 << 9));
8716 tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8717 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8718
8719 tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8720 tmp &= ~LC_SET_QUIESCE;
8721 WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8722 }
8723 }
8724 }
8725
8726 /* set the link speed */
8727 speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8728 speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8729 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8730
8731 pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8732 tmp16 &= ~0xf;
8733 if (mask & DRM_PCIE_SPEED_80)
8734 tmp16 |= 3; /* gen3 */
8735 else if (mask & DRM_PCIE_SPEED_50)
8736 tmp16 |= 2; /* gen2 */
8737 else
8738 tmp16 |= 1; /* gen1 */
8739 pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8740
8741 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8742 speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8743 WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8744
8745 for (i = 0; i < rdev->usec_timeout; i++) {
8746 speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8747 if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8748 break;
8749 udelay(1);
8750 }
8751 }
8752
8753 static void cik_program_aspm(struct radeon_device *rdev)
8754 {
8755 u32 data, orig;
8756 bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8757 bool disable_clkreq = false;
8758
8759 if (radeon_aspm == 0)
8760 return;
8761
8762 /* XXX double check IGPs */
8763 if (rdev->flags & RADEON_IS_IGP)
8764 return;
8765
8766 if (!(rdev->flags & RADEON_IS_PCIE))
8767 return;
8768
8769 orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8770 data &= ~LC_XMIT_N_FTS_MASK;
8771 data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8772 if (orig != data)
8773 WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8774
8775 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8776 data |= LC_GO_TO_RECOVERY;
8777 if (orig != data)
8778 WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8779
8780 orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8781 data |= P_IGNORE_EDB_ERR;
8782 if (orig != data)
8783 WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8784
8785 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8786 data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8787 data |= LC_PMI_TO_L1_DIS;
8788 if (!disable_l0s)
8789 data |= LC_L0S_INACTIVITY(7);
8790
8791 if (!disable_l1) {
8792 data |= LC_L1_INACTIVITY(7);
8793 data &= ~LC_PMI_TO_L1_DIS;
8794 if (orig != data)
8795 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8796
8797 if (!disable_plloff_in_l1) {
8798 bool clk_req_support;
8799
8800 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8801 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8802 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8803 if (orig != data)
8804 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8805
8806 orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8807 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8808 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8809 if (orig != data)
8810 WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8811
8812 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8813 data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8814 data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8815 if (orig != data)
8816 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8817
8818 orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8819 data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8820 data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8821 if (orig != data)
8822 WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8823
8824 orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8825 data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8826 data |= LC_DYN_LANES_PWR_STATE(3);
8827 if (orig != data)
8828 WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8829
8830 if (!disable_clkreq) {
8831 struct pci_dev *root = rdev->pdev->bus->self;
8832 u32 lnkcap;
8833
8834 clk_req_support = false;
8835 pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8836 if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8837 clk_req_support = true;
8838 } else {
8839 clk_req_support = false;
8840 }
8841
8842 if (clk_req_support) {
8843 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8844 data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8845 if (orig != data)
8846 WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8847
8848 orig = data = RREG32_SMC(THM_CLK_CNTL);
8849 data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8850 data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8851 if (orig != data)
8852 WREG32_SMC(THM_CLK_CNTL, data);
8853
8854 orig = data = RREG32_SMC(MISC_CLK_CTRL);
8855 data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8856 data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8857 if (orig != data)
8858 WREG32_SMC(MISC_CLK_CTRL, data);
8859
8860 orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8861 data &= ~BCLK_AS_XCLK;
8862 if (orig != data)
8863 WREG32_SMC(CG_CLKPIN_CNTL, data);
8864
8865 orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8866 data &= ~FORCE_BIF_REFCLK_EN;
8867 if (orig != data)
8868 WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8869
8870 orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8871 data &= ~MPLL_CLKOUT_SEL_MASK;
8872 data |= MPLL_CLKOUT_SEL(4);
8873 if (orig != data)
8874 WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8875 }
8876 }
8877 } else {
8878 if (orig != data)
8879 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8880 }
8881
8882 orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8883 data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8884 if (orig != data)
8885 WREG32_PCIE_PORT(PCIE_CNTL2, data);
8886
8887 if (!disable_l0s) {
8888 data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8889 if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8890 data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8891 if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8892 orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8893 data &= ~LC_L0S_INACTIVITY_MASK;
8894 if (orig != data)
8895 WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8896 }
8897 }
8898 }
8899 }
This page took 0.358342 seconds and 5 git commands to generate.