Merge branch 'fortglx/3.11/time' of git://git.linaro.org/people/jstultz/linux into...
[deliverable/linux.git] / drivers / gpu / drm / radeon / cik.c
1 /*
2 * Copyright 2012 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33
34 /* GFX */
35 #define CIK_PFP_UCODE_SIZE 2144
36 #define CIK_ME_UCODE_SIZE 2144
37 #define CIK_CE_UCODE_SIZE 2144
38 /* compute */
39 #define CIK_MEC_UCODE_SIZE 4192
40 /* interrupts */
41 #define BONAIRE_RLC_UCODE_SIZE 2048
42 #define KB_RLC_UCODE_SIZE 2560
43 #define KV_RLC_UCODE_SIZE 2560
44 /* gddr controller */
45 #define CIK_MC_UCODE_SIZE 7866
46 /* sdma */
47 #define CIK_SDMA_UCODE_SIZE 1050
48 #define CIK_SDMA_UCODE_VERSION 64
49
50 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
51 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
63 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
64 MODULE_FIRMWARE("radeon/KABINI_me.bin");
65 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
66 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
67 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
68 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
69
70 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
71 extern void r600_ih_ring_fini(struct radeon_device *rdev);
72 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
73 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
75 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
76 extern void si_rlc_fini(struct radeon_device *rdev);
77 extern int si_rlc_init(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79
80 /*
81 * Indirect registers accessor
82 */
83 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
84 {
85 u32 r;
86
87 WREG32(PCIE_INDEX, reg);
88 (void)RREG32(PCIE_INDEX);
89 r = RREG32(PCIE_DATA);
90 return r;
91 }
92
93 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
94 {
95 WREG32(PCIE_INDEX, reg);
96 (void)RREG32(PCIE_INDEX);
97 WREG32(PCIE_DATA, v);
98 (void)RREG32(PCIE_DATA);
99 }
100
101 static const u32 bonaire_golden_spm_registers[] =
102 {
103 0x30800, 0xe0ffffff, 0xe0000000
104 };
105
106 static const u32 bonaire_golden_common_registers[] =
107 {
108 0xc770, 0xffffffff, 0x00000800,
109 0xc774, 0xffffffff, 0x00000800,
110 0xc798, 0xffffffff, 0x00007fbf,
111 0xc79c, 0xffffffff, 0x00007faf
112 };
113
114 static const u32 bonaire_golden_registers[] =
115 {
116 0x3354, 0x00000333, 0x00000333,
117 0x3350, 0x000c0fc0, 0x00040200,
118 0x9a10, 0x00010000, 0x00058208,
119 0x3c000, 0xffff1fff, 0x00140000,
120 0x3c200, 0xfdfc0fff, 0x00000100,
121 0x3c234, 0x40000000, 0x40000200,
122 0x9830, 0xffffffff, 0x00000000,
123 0x9834, 0xf00fffff, 0x00000400,
124 0x9838, 0x0002021c, 0x00020200,
125 0xc78, 0x00000080, 0x00000000,
126 0x5bb0, 0x000000f0, 0x00000070,
127 0x5bc0, 0xf0311fff, 0x80300000,
128 0x98f8, 0x73773777, 0x12010001,
129 0x350c, 0x00810000, 0x408af000,
130 0x7030, 0x31000111, 0x00000011,
131 0x2f48, 0x73773777, 0x12010001,
132 0x220c, 0x00007fb6, 0x0021a1b1,
133 0x2210, 0x00007fb6, 0x002021b1,
134 0x2180, 0x00007fb6, 0x00002191,
135 0x2218, 0x00007fb6, 0x002121b1,
136 0x221c, 0x00007fb6, 0x002021b1,
137 0x21dc, 0x00007fb6, 0x00002191,
138 0x21e0, 0x00007fb6, 0x00002191,
139 0x3628, 0x0000003f, 0x0000000a,
140 0x362c, 0x0000003f, 0x0000000a,
141 0x2ae4, 0x00073ffe, 0x000022a2,
142 0x240c, 0x000007ff, 0x00000000,
143 0x8a14, 0xf000003f, 0x00000007,
144 0x8bf0, 0x00002001, 0x00000001,
145 0x8b24, 0xffffffff, 0x00ffffff,
146 0x30a04, 0x0000ff0f, 0x00000000,
147 0x28a4c, 0x07ffffff, 0x06000000,
148 0x4d8, 0x00000fff, 0x00000100,
149 0x3e78, 0x00000001, 0x00000002,
150 0x9100, 0x03000000, 0x0362c688,
151 0x8c00, 0x000000ff, 0x00000001,
152 0xe40, 0x00001fff, 0x00001fff,
153 0x9060, 0x0000007f, 0x00000020,
154 0x9508, 0x00010000, 0x00010000,
155 0xac14, 0x000003ff, 0x000000f3,
156 0xac0c, 0xffffffff, 0x00001032
157 };
158
159 static const u32 bonaire_mgcg_cgcg_init[] =
160 {
161 0xc420, 0xffffffff, 0xfffffffc,
162 0x30800, 0xffffffff, 0xe0000000,
163 0x3c2a0, 0xffffffff, 0x00000100,
164 0x3c208, 0xffffffff, 0x00000100,
165 0x3c2c0, 0xffffffff, 0xc0000100,
166 0x3c2c8, 0xffffffff, 0xc0000100,
167 0x3c2c4, 0xffffffff, 0xc0000100,
168 0x55e4, 0xffffffff, 0x00600100,
169 0x3c280, 0xffffffff, 0x00000100,
170 0x3c214, 0xffffffff, 0x06000100,
171 0x3c220, 0xffffffff, 0x00000100,
172 0x3c218, 0xffffffff, 0x06000100,
173 0x3c204, 0xffffffff, 0x00000100,
174 0x3c2e0, 0xffffffff, 0x00000100,
175 0x3c224, 0xffffffff, 0x00000100,
176 0x3c200, 0xffffffff, 0x00000100,
177 0x3c230, 0xffffffff, 0x00000100,
178 0x3c234, 0xffffffff, 0x00000100,
179 0x3c250, 0xffffffff, 0x00000100,
180 0x3c254, 0xffffffff, 0x00000100,
181 0x3c258, 0xffffffff, 0x00000100,
182 0x3c25c, 0xffffffff, 0x00000100,
183 0x3c260, 0xffffffff, 0x00000100,
184 0x3c27c, 0xffffffff, 0x00000100,
185 0x3c278, 0xffffffff, 0x00000100,
186 0x3c210, 0xffffffff, 0x06000100,
187 0x3c290, 0xffffffff, 0x00000100,
188 0x3c274, 0xffffffff, 0x00000100,
189 0x3c2b4, 0xffffffff, 0x00000100,
190 0x3c2b0, 0xffffffff, 0x00000100,
191 0x3c270, 0xffffffff, 0x00000100,
192 0x30800, 0xffffffff, 0xe0000000,
193 0x3c020, 0xffffffff, 0x00010000,
194 0x3c024, 0xffffffff, 0x00030002,
195 0x3c028, 0xffffffff, 0x00040007,
196 0x3c02c, 0xffffffff, 0x00060005,
197 0x3c030, 0xffffffff, 0x00090008,
198 0x3c034, 0xffffffff, 0x00010000,
199 0x3c038, 0xffffffff, 0x00030002,
200 0x3c03c, 0xffffffff, 0x00040007,
201 0x3c040, 0xffffffff, 0x00060005,
202 0x3c044, 0xffffffff, 0x00090008,
203 0x3c048, 0xffffffff, 0x00010000,
204 0x3c04c, 0xffffffff, 0x00030002,
205 0x3c050, 0xffffffff, 0x00040007,
206 0x3c054, 0xffffffff, 0x00060005,
207 0x3c058, 0xffffffff, 0x00090008,
208 0x3c05c, 0xffffffff, 0x00010000,
209 0x3c060, 0xffffffff, 0x00030002,
210 0x3c064, 0xffffffff, 0x00040007,
211 0x3c068, 0xffffffff, 0x00060005,
212 0x3c06c, 0xffffffff, 0x00090008,
213 0x3c070, 0xffffffff, 0x00010000,
214 0x3c074, 0xffffffff, 0x00030002,
215 0x3c078, 0xffffffff, 0x00040007,
216 0x3c07c, 0xffffffff, 0x00060005,
217 0x3c080, 0xffffffff, 0x00090008,
218 0x3c084, 0xffffffff, 0x00010000,
219 0x3c088, 0xffffffff, 0x00030002,
220 0x3c08c, 0xffffffff, 0x00040007,
221 0x3c090, 0xffffffff, 0x00060005,
222 0x3c094, 0xffffffff, 0x00090008,
223 0x3c098, 0xffffffff, 0x00010000,
224 0x3c09c, 0xffffffff, 0x00030002,
225 0x3c0a0, 0xffffffff, 0x00040007,
226 0x3c0a4, 0xffffffff, 0x00060005,
227 0x3c0a8, 0xffffffff, 0x00090008,
228 0x3c000, 0xffffffff, 0x96e00200,
229 0x8708, 0xffffffff, 0x00900100,
230 0xc424, 0xffffffff, 0x0020003f,
231 0x38, 0xffffffff, 0x0140001c,
232 0x3c, 0x000f0000, 0x000f0000,
233 0x220, 0xffffffff, 0xC060000C,
234 0x224, 0xc0000fff, 0x00000100,
235 0xf90, 0xffffffff, 0x00000100,
236 0xf98, 0x00000101, 0x00000000,
237 0x20a8, 0xffffffff, 0x00000104,
238 0x55e4, 0xff000fff, 0x00000100,
239 0x30cc, 0xc0000fff, 0x00000104,
240 0xc1e4, 0x00000001, 0x00000001,
241 0xd00c, 0xff000ff0, 0x00000100,
242 0xd80c, 0xff000ff0, 0x00000100
243 };
244
245 static const u32 spectre_golden_spm_registers[] =
246 {
247 0x30800, 0xe0ffffff, 0xe0000000
248 };
249
250 static const u32 spectre_golden_common_registers[] =
251 {
252 0xc770, 0xffffffff, 0x00000800,
253 0xc774, 0xffffffff, 0x00000800,
254 0xc798, 0xffffffff, 0x00007fbf,
255 0xc79c, 0xffffffff, 0x00007faf
256 };
257
258 static const u32 spectre_golden_registers[] =
259 {
260 0x3c000, 0xffff1fff, 0x96940200,
261 0x3c00c, 0xffff0001, 0xff000000,
262 0x3c200, 0xfffc0fff, 0x00000100,
263 0x6ed8, 0x00010101, 0x00010000,
264 0x9834, 0xf00fffff, 0x00000400,
265 0x9838, 0xfffffffc, 0x00020200,
266 0x5bb0, 0x000000f0, 0x00000070,
267 0x5bc0, 0xf0311fff, 0x80300000,
268 0x98f8, 0x73773777, 0x12010001,
269 0x9b7c, 0x00ff0000, 0x00fc0000,
270 0x2f48, 0x73773777, 0x12010001,
271 0x8a14, 0xf000003f, 0x00000007,
272 0x8b24, 0xffffffff, 0x00ffffff,
273 0x28350, 0x3f3f3fff, 0x00000082,
274 0x28355, 0x0000003f, 0x00000000,
275 0x3e78, 0x00000001, 0x00000002,
276 0x913c, 0xffff03df, 0x00000004,
277 0xc768, 0x00000008, 0x00000008,
278 0x8c00, 0x000008ff, 0x00000800,
279 0x9508, 0x00010000, 0x00010000,
280 0xac0c, 0xffffffff, 0x54763210,
281 0x214f8, 0x01ff01ff, 0x00000002,
282 0x21498, 0x007ff800, 0x00200000,
283 0x2015c, 0xffffffff, 0x00000f40,
284 0x30934, 0xffffffff, 0x00000001
285 };
286
287 static const u32 spectre_mgcg_cgcg_init[] =
288 {
289 0xc420, 0xffffffff, 0xfffffffc,
290 0x30800, 0xffffffff, 0xe0000000,
291 0x3c2a0, 0xffffffff, 0x00000100,
292 0x3c208, 0xffffffff, 0x00000100,
293 0x3c2c0, 0xffffffff, 0x00000100,
294 0x3c2c8, 0xffffffff, 0x00000100,
295 0x3c2c4, 0xffffffff, 0x00000100,
296 0x55e4, 0xffffffff, 0x00600100,
297 0x3c280, 0xffffffff, 0x00000100,
298 0x3c214, 0xffffffff, 0x06000100,
299 0x3c220, 0xffffffff, 0x00000100,
300 0x3c218, 0xffffffff, 0x06000100,
301 0x3c204, 0xffffffff, 0x00000100,
302 0x3c2e0, 0xffffffff, 0x00000100,
303 0x3c224, 0xffffffff, 0x00000100,
304 0x3c200, 0xffffffff, 0x00000100,
305 0x3c230, 0xffffffff, 0x00000100,
306 0x3c234, 0xffffffff, 0x00000100,
307 0x3c250, 0xffffffff, 0x00000100,
308 0x3c254, 0xffffffff, 0x00000100,
309 0x3c258, 0xffffffff, 0x00000100,
310 0x3c25c, 0xffffffff, 0x00000100,
311 0x3c260, 0xffffffff, 0x00000100,
312 0x3c27c, 0xffffffff, 0x00000100,
313 0x3c278, 0xffffffff, 0x00000100,
314 0x3c210, 0xffffffff, 0x06000100,
315 0x3c290, 0xffffffff, 0x00000100,
316 0x3c274, 0xffffffff, 0x00000100,
317 0x3c2b4, 0xffffffff, 0x00000100,
318 0x3c2b0, 0xffffffff, 0x00000100,
319 0x3c270, 0xffffffff, 0x00000100,
320 0x30800, 0xffffffff, 0xe0000000,
321 0x3c020, 0xffffffff, 0x00010000,
322 0x3c024, 0xffffffff, 0x00030002,
323 0x3c028, 0xffffffff, 0x00040007,
324 0x3c02c, 0xffffffff, 0x00060005,
325 0x3c030, 0xffffffff, 0x00090008,
326 0x3c034, 0xffffffff, 0x00010000,
327 0x3c038, 0xffffffff, 0x00030002,
328 0x3c03c, 0xffffffff, 0x00040007,
329 0x3c040, 0xffffffff, 0x00060005,
330 0x3c044, 0xffffffff, 0x00090008,
331 0x3c048, 0xffffffff, 0x00010000,
332 0x3c04c, 0xffffffff, 0x00030002,
333 0x3c050, 0xffffffff, 0x00040007,
334 0x3c054, 0xffffffff, 0x00060005,
335 0x3c058, 0xffffffff, 0x00090008,
336 0x3c05c, 0xffffffff, 0x00010000,
337 0x3c060, 0xffffffff, 0x00030002,
338 0x3c064, 0xffffffff, 0x00040007,
339 0x3c068, 0xffffffff, 0x00060005,
340 0x3c06c, 0xffffffff, 0x00090008,
341 0x3c070, 0xffffffff, 0x00010000,
342 0x3c074, 0xffffffff, 0x00030002,
343 0x3c078, 0xffffffff, 0x00040007,
344 0x3c07c, 0xffffffff, 0x00060005,
345 0x3c080, 0xffffffff, 0x00090008,
346 0x3c084, 0xffffffff, 0x00010000,
347 0x3c088, 0xffffffff, 0x00030002,
348 0x3c08c, 0xffffffff, 0x00040007,
349 0x3c090, 0xffffffff, 0x00060005,
350 0x3c094, 0xffffffff, 0x00090008,
351 0x3c098, 0xffffffff, 0x00010000,
352 0x3c09c, 0xffffffff, 0x00030002,
353 0x3c0a0, 0xffffffff, 0x00040007,
354 0x3c0a4, 0xffffffff, 0x00060005,
355 0x3c0a8, 0xffffffff, 0x00090008,
356 0x3c0ac, 0xffffffff, 0x00010000,
357 0x3c0b0, 0xffffffff, 0x00030002,
358 0x3c0b4, 0xffffffff, 0x00040007,
359 0x3c0b8, 0xffffffff, 0x00060005,
360 0x3c0bc, 0xffffffff, 0x00090008,
361 0x3c000, 0xffffffff, 0x96e00200,
362 0x8708, 0xffffffff, 0x00900100,
363 0xc424, 0xffffffff, 0x0020003f,
364 0x38, 0xffffffff, 0x0140001c,
365 0x3c, 0x000f0000, 0x000f0000,
366 0x220, 0xffffffff, 0xC060000C,
367 0x224, 0xc0000fff, 0x00000100,
368 0xf90, 0xffffffff, 0x00000100,
369 0xf98, 0x00000101, 0x00000000,
370 0x20a8, 0xffffffff, 0x00000104,
371 0x55e4, 0xff000fff, 0x00000100,
372 0x30cc, 0xc0000fff, 0x00000104,
373 0xc1e4, 0x00000001, 0x00000001,
374 0xd00c, 0xff000ff0, 0x00000100,
375 0xd80c, 0xff000ff0, 0x00000100
376 };
377
378 static const u32 kalindi_golden_spm_registers[] =
379 {
380 0x30800, 0xe0ffffff, 0xe0000000
381 };
382
383 static const u32 kalindi_golden_common_registers[] =
384 {
385 0xc770, 0xffffffff, 0x00000800,
386 0xc774, 0xffffffff, 0x00000800,
387 0xc798, 0xffffffff, 0x00007fbf,
388 0xc79c, 0xffffffff, 0x00007faf
389 };
390
391 static const u32 kalindi_golden_registers[] =
392 {
393 0x3c000, 0xffffdfff, 0x6e944040,
394 0x55e4, 0xff607fff, 0xfc000100,
395 0x3c220, 0xff000fff, 0x00000100,
396 0x3c224, 0xff000fff, 0x00000100,
397 0x3c200, 0xfffc0fff, 0x00000100,
398 0x6ed8, 0x00010101, 0x00010000,
399 0x9830, 0xffffffff, 0x00000000,
400 0x9834, 0xf00fffff, 0x00000400,
401 0x5bb0, 0x000000f0, 0x00000070,
402 0x5bc0, 0xf0311fff, 0x80300000,
403 0x98f8, 0x73773777, 0x12010001,
404 0x98fc, 0xffffffff, 0x00000010,
405 0x9b7c, 0x00ff0000, 0x00fc0000,
406 0x8030, 0x00001f0f, 0x0000100a,
407 0x2f48, 0x73773777, 0x12010001,
408 0x2408, 0x000fffff, 0x000c007f,
409 0x8a14, 0xf000003f, 0x00000007,
410 0x8b24, 0x3fff3fff, 0x00ffcfff,
411 0x30a04, 0x0000ff0f, 0x00000000,
412 0x28a4c, 0x07ffffff, 0x06000000,
413 0x4d8, 0x00000fff, 0x00000100,
414 0x3e78, 0x00000001, 0x00000002,
415 0xc768, 0x00000008, 0x00000008,
416 0x8c00, 0x000000ff, 0x00000003,
417 0x214f8, 0x01ff01ff, 0x00000002,
418 0x21498, 0x007ff800, 0x00200000,
419 0x2015c, 0xffffffff, 0x00000f40,
420 0x88c4, 0x001f3ae3, 0x00000082,
421 0x88d4, 0x0000001f, 0x00000010,
422 0x30934, 0xffffffff, 0x00000000
423 };
424
425 static const u32 kalindi_mgcg_cgcg_init[] =
426 {
427 0xc420, 0xffffffff, 0xfffffffc,
428 0x30800, 0xffffffff, 0xe0000000,
429 0x3c2a0, 0xffffffff, 0x00000100,
430 0x3c208, 0xffffffff, 0x00000100,
431 0x3c2c0, 0xffffffff, 0x00000100,
432 0x3c2c8, 0xffffffff, 0x00000100,
433 0x3c2c4, 0xffffffff, 0x00000100,
434 0x55e4, 0xffffffff, 0x00600100,
435 0x3c280, 0xffffffff, 0x00000100,
436 0x3c214, 0xffffffff, 0x06000100,
437 0x3c220, 0xffffffff, 0x00000100,
438 0x3c218, 0xffffffff, 0x06000100,
439 0x3c204, 0xffffffff, 0x00000100,
440 0x3c2e0, 0xffffffff, 0x00000100,
441 0x3c224, 0xffffffff, 0x00000100,
442 0x3c200, 0xffffffff, 0x00000100,
443 0x3c230, 0xffffffff, 0x00000100,
444 0x3c234, 0xffffffff, 0x00000100,
445 0x3c250, 0xffffffff, 0x00000100,
446 0x3c254, 0xffffffff, 0x00000100,
447 0x3c258, 0xffffffff, 0x00000100,
448 0x3c25c, 0xffffffff, 0x00000100,
449 0x3c260, 0xffffffff, 0x00000100,
450 0x3c27c, 0xffffffff, 0x00000100,
451 0x3c278, 0xffffffff, 0x00000100,
452 0x3c210, 0xffffffff, 0x06000100,
453 0x3c290, 0xffffffff, 0x00000100,
454 0x3c274, 0xffffffff, 0x00000100,
455 0x3c2b4, 0xffffffff, 0x00000100,
456 0x3c2b0, 0xffffffff, 0x00000100,
457 0x3c270, 0xffffffff, 0x00000100,
458 0x30800, 0xffffffff, 0xe0000000,
459 0x3c020, 0xffffffff, 0x00010000,
460 0x3c024, 0xffffffff, 0x00030002,
461 0x3c028, 0xffffffff, 0x00040007,
462 0x3c02c, 0xffffffff, 0x00060005,
463 0x3c030, 0xffffffff, 0x00090008,
464 0x3c034, 0xffffffff, 0x00010000,
465 0x3c038, 0xffffffff, 0x00030002,
466 0x3c03c, 0xffffffff, 0x00040007,
467 0x3c040, 0xffffffff, 0x00060005,
468 0x3c044, 0xffffffff, 0x00090008,
469 0x3c000, 0xffffffff, 0x96e00200,
470 0x8708, 0xffffffff, 0x00900100,
471 0xc424, 0xffffffff, 0x0020003f,
472 0x38, 0xffffffff, 0x0140001c,
473 0x3c, 0x000f0000, 0x000f0000,
474 0x220, 0xffffffff, 0xC060000C,
475 0x224, 0xc0000fff, 0x00000100,
476 0x20a8, 0xffffffff, 0x00000104,
477 0x55e4, 0xff000fff, 0x00000100,
478 0x30cc, 0xc0000fff, 0x00000104,
479 0xc1e4, 0x00000001, 0x00000001,
480 0xd00c, 0xff000ff0, 0x00000100,
481 0xd80c, 0xff000ff0, 0x00000100
482 };
483
484 static void cik_init_golden_registers(struct radeon_device *rdev)
485 {
486 switch (rdev->family) {
487 case CHIP_BONAIRE:
488 radeon_program_register_sequence(rdev,
489 bonaire_mgcg_cgcg_init,
490 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
491 radeon_program_register_sequence(rdev,
492 bonaire_golden_registers,
493 (const u32)ARRAY_SIZE(bonaire_golden_registers));
494 radeon_program_register_sequence(rdev,
495 bonaire_golden_common_registers,
496 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
497 radeon_program_register_sequence(rdev,
498 bonaire_golden_spm_registers,
499 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
500 break;
501 case CHIP_KABINI:
502 radeon_program_register_sequence(rdev,
503 kalindi_mgcg_cgcg_init,
504 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
505 radeon_program_register_sequence(rdev,
506 kalindi_golden_registers,
507 (const u32)ARRAY_SIZE(kalindi_golden_registers));
508 radeon_program_register_sequence(rdev,
509 kalindi_golden_common_registers,
510 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
511 radeon_program_register_sequence(rdev,
512 kalindi_golden_spm_registers,
513 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
514 break;
515 case CHIP_KAVERI:
516 radeon_program_register_sequence(rdev,
517 spectre_mgcg_cgcg_init,
518 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
519 radeon_program_register_sequence(rdev,
520 spectre_golden_registers,
521 (const u32)ARRAY_SIZE(spectre_golden_registers));
522 radeon_program_register_sequence(rdev,
523 spectre_golden_common_registers,
524 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
525 radeon_program_register_sequence(rdev,
526 spectre_golden_spm_registers,
527 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
528 break;
529 default:
530 break;
531 }
532 }
533
534 /**
535 * cik_get_xclk - get the xclk
536 *
537 * @rdev: radeon_device pointer
538 *
539 * Returns the reference clock used by the gfx engine
540 * (CIK).
541 */
542 u32 cik_get_xclk(struct radeon_device *rdev)
543 {
544 u32 reference_clock = rdev->clock.spll.reference_freq;
545
546 if (rdev->flags & RADEON_IS_IGP) {
547 if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
548 return reference_clock / 2;
549 } else {
550 if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
551 return reference_clock / 4;
552 }
553 return reference_clock;
554 }
555
556 /**
557 * cik_mm_rdoorbell - read a doorbell dword
558 *
559 * @rdev: radeon_device pointer
560 * @offset: byte offset into the aperture
561 *
562 * Returns the value in the doorbell aperture at the
563 * requested offset (CIK).
564 */
565 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
566 {
567 if (offset < rdev->doorbell.size) {
568 return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
569 } else {
570 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
571 return 0;
572 }
573 }
574
575 /**
576 * cik_mm_wdoorbell - write a doorbell dword
577 *
578 * @rdev: radeon_device pointer
579 * @offset: byte offset into the aperture
580 * @v: value to write
581 *
582 * Writes @v to the doorbell aperture at the
583 * requested offset (CIK).
584 */
585 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
586 {
587 if (offset < rdev->doorbell.size) {
588 writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
589 } else {
590 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
591 }
592 }
593
594 #define BONAIRE_IO_MC_REGS_SIZE 36
595
596 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
597 {
598 {0x00000070, 0x04400000},
599 {0x00000071, 0x80c01803},
600 {0x00000072, 0x00004004},
601 {0x00000073, 0x00000100},
602 {0x00000074, 0x00ff0000},
603 {0x00000075, 0x34000000},
604 {0x00000076, 0x08000014},
605 {0x00000077, 0x00cc08ec},
606 {0x00000078, 0x00000400},
607 {0x00000079, 0x00000000},
608 {0x0000007a, 0x04090000},
609 {0x0000007c, 0x00000000},
610 {0x0000007e, 0x4408a8e8},
611 {0x0000007f, 0x00000304},
612 {0x00000080, 0x00000000},
613 {0x00000082, 0x00000001},
614 {0x00000083, 0x00000002},
615 {0x00000084, 0xf3e4f400},
616 {0x00000085, 0x052024e3},
617 {0x00000087, 0x00000000},
618 {0x00000088, 0x01000000},
619 {0x0000008a, 0x1c0a0000},
620 {0x0000008b, 0xff010000},
621 {0x0000008d, 0xffffefff},
622 {0x0000008e, 0xfff3efff},
623 {0x0000008f, 0xfff3efbf},
624 {0x00000092, 0xf7ffffff},
625 {0x00000093, 0xffffff7f},
626 {0x00000095, 0x00101101},
627 {0x00000096, 0x00000fff},
628 {0x00000097, 0x00116fff},
629 {0x00000098, 0x60010000},
630 {0x00000099, 0x10010000},
631 {0x0000009a, 0x00006000},
632 {0x0000009b, 0x00001000},
633 {0x0000009f, 0x00b48000}
634 };
635
636 /**
637 * cik_srbm_select - select specific register instances
638 *
639 * @rdev: radeon_device pointer
640 * @me: selected ME (micro engine)
641 * @pipe: pipe
642 * @queue: queue
643 * @vmid: VMID
644 *
645 * Switches the currently active registers instances. Some
646 * registers are instanced per VMID, others are instanced per
647 * me/pipe/queue combination.
648 */
649 static void cik_srbm_select(struct radeon_device *rdev,
650 u32 me, u32 pipe, u32 queue, u32 vmid)
651 {
652 u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
653 MEID(me & 0x3) |
654 VMID(vmid & 0xf) |
655 QUEUEID(queue & 0x7));
656 WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
657 }
658
659 /* ucode loading */
660 /**
661 * ci_mc_load_microcode - load MC ucode into the hw
662 *
663 * @rdev: radeon_device pointer
664 *
665 * Load the GDDR MC ucode into the hw (CIK).
666 * Returns 0 on success, error on failure.
667 */
668 static int ci_mc_load_microcode(struct radeon_device *rdev)
669 {
670 const __be32 *fw_data;
671 u32 running, blackout = 0;
672 u32 *io_mc_regs;
673 int i, ucode_size, regs_size;
674
675 if (!rdev->mc_fw)
676 return -EINVAL;
677
678 switch (rdev->family) {
679 case CHIP_BONAIRE:
680 default:
681 io_mc_regs = (u32 *)&bonaire_io_mc_regs;
682 ucode_size = CIK_MC_UCODE_SIZE;
683 regs_size = BONAIRE_IO_MC_REGS_SIZE;
684 break;
685 }
686
687 running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
688
689 if (running == 0) {
690 if (running) {
691 blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
692 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
693 }
694
695 /* reset the engine and set to writable */
696 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
697 WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
698
699 /* load mc io regs */
700 for (i = 0; i < regs_size; i++) {
701 WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
702 WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
703 }
704 /* load the MC ucode */
705 fw_data = (const __be32 *)rdev->mc_fw->data;
706 for (i = 0; i < ucode_size; i++)
707 WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
708
709 /* put the engine back into the active state */
710 WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
711 WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
712 WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
713
714 /* wait for training to complete */
715 for (i = 0; i < rdev->usec_timeout; i++) {
716 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
717 break;
718 udelay(1);
719 }
720 for (i = 0; i < rdev->usec_timeout; i++) {
721 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
722 break;
723 udelay(1);
724 }
725
726 if (running)
727 WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
728 }
729
730 return 0;
731 }
732
733 /**
734 * cik_init_microcode - load ucode images from disk
735 *
736 * @rdev: radeon_device pointer
737 *
738 * Use the firmware interface to load the ucode images into
739 * the driver (not loaded into hw).
740 * Returns 0 on success, error on failure.
741 */
742 static int cik_init_microcode(struct radeon_device *rdev)
743 {
744 const char *chip_name;
745 size_t pfp_req_size, me_req_size, ce_req_size,
746 mec_req_size, rlc_req_size, mc_req_size,
747 sdma_req_size;
748 char fw_name[30];
749 int err;
750
751 DRM_DEBUG("\n");
752
753 switch (rdev->family) {
754 case CHIP_BONAIRE:
755 chip_name = "BONAIRE";
756 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
757 me_req_size = CIK_ME_UCODE_SIZE * 4;
758 ce_req_size = CIK_CE_UCODE_SIZE * 4;
759 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
760 rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
761 mc_req_size = CIK_MC_UCODE_SIZE * 4;
762 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
763 break;
764 case CHIP_KAVERI:
765 chip_name = "KAVERI";
766 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
767 me_req_size = CIK_ME_UCODE_SIZE * 4;
768 ce_req_size = CIK_CE_UCODE_SIZE * 4;
769 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
770 rlc_req_size = KV_RLC_UCODE_SIZE * 4;
771 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772 break;
773 case CHIP_KABINI:
774 chip_name = "KABINI";
775 pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 me_req_size = CIK_ME_UCODE_SIZE * 4;
777 ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 rlc_req_size = KB_RLC_UCODE_SIZE * 4;
780 sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
781 break;
782 default: BUG();
783 }
784
785 DRM_INFO("Loading %s Microcode\n", chip_name);
786
787 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
788 err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
789 if (err)
790 goto out;
791 if (rdev->pfp_fw->size != pfp_req_size) {
792 printk(KERN_ERR
793 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
794 rdev->pfp_fw->size, fw_name);
795 err = -EINVAL;
796 goto out;
797 }
798
799 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
800 err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
801 if (err)
802 goto out;
803 if (rdev->me_fw->size != me_req_size) {
804 printk(KERN_ERR
805 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
806 rdev->me_fw->size, fw_name);
807 err = -EINVAL;
808 }
809
810 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
811 err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
812 if (err)
813 goto out;
814 if (rdev->ce_fw->size != ce_req_size) {
815 printk(KERN_ERR
816 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
817 rdev->ce_fw->size, fw_name);
818 err = -EINVAL;
819 }
820
821 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
822 err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
823 if (err)
824 goto out;
825 if (rdev->mec_fw->size != mec_req_size) {
826 printk(KERN_ERR
827 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
828 rdev->mec_fw->size, fw_name);
829 err = -EINVAL;
830 }
831
832 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
833 err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
834 if (err)
835 goto out;
836 if (rdev->rlc_fw->size != rlc_req_size) {
837 printk(KERN_ERR
838 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
839 rdev->rlc_fw->size, fw_name);
840 err = -EINVAL;
841 }
842
843 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
844 err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
845 if (err)
846 goto out;
847 if (rdev->sdma_fw->size != sdma_req_size) {
848 printk(KERN_ERR
849 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
850 rdev->sdma_fw->size, fw_name);
851 err = -EINVAL;
852 }
853
854 /* No MC ucode on APUs */
855 if (!(rdev->flags & RADEON_IS_IGP)) {
856 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
857 err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
858 if (err)
859 goto out;
860 if (rdev->mc_fw->size != mc_req_size) {
861 printk(KERN_ERR
862 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
863 rdev->mc_fw->size, fw_name);
864 err = -EINVAL;
865 }
866 }
867
868 out:
869 if (err) {
870 if (err != -EINVAL)
871 printk(KERN_ERR
872 "cik_cp: Failed to load firmware \"%s\"\n",
873 fw_name);
874 release_firmware(rdev->pfp_fw);
875 rdev->pfp_fw = NULL;
876 release_firmware(rdev->me_fw);
877 rdev->me_fw = NULL;
878 release_firmware(rdev->ce_fw);
879 rdev->ce_fw = NULL;
880 release_firmware(rdev->rlc_fw);
881 rdev->rlc_fw = NULL;
882 release_firmware(rdev->mc_fw);
883 rdev->mc_fw = NULL;
884 }
885 return err;
886 }
887
888 /*
889 * Core functions
890 */
891 /**
892 * cik_tiling_mode_table_init - init the hw tiling table
893 *
894 * @rdev: radeon_device pointer
895 *
896 * Starting with SI, the tiling setup is done globally in a
897 * set of 32 tiling modes. Rather than selecting each set of
898 * parameters per surface as on older asics, we just select
899 * which index in the tiling table we want to use, and the
900 * surface uses those parameters (CIK).
901 */
902 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
903 {
904 const u32 num_tile_mode_states = 32;
905 const u32 num_secondary_tile_mode_states = 16;
906 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
907 u32 num_pipe_configs;
908 u32 num_rbs = rdev->config.cik.max_backends_per_se *
909 rdev->config.cik.max_shader_engines;
910
911 switch (rdev->config.cik.mem_row_size_in_kb) {
912 case 1:
913 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
914 break;
915 case 2:
916 default:
917 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
918 break;
919 case 4:
920 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
921 break;
922 }
923
924 num_pipe_configs = rdev->config.cik.max_tile_pipes;
925 if (num_pipe_configs > 8)
926 num_pipe_configs = 8; /* ??? */
927
928 if (num_pipe_configs == 8) {
929 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
930 switch (reg_offset) {
931 case 0:
932 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
933 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
934 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
935 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
936 break;
937 case 1:
938 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
939 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
940 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
941 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
942 break;
943 case 2:
944 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
946 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
947 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
948 break;
949 case 3:
950 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
952 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
953 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
954 break;
955 case 4:
956 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
958 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
959 TILE_SPLIT(split_equal_to_row_size));
960 break;
961 case 5:
962 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
963 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
964 break;
965 case 6:
966 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
967 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
968 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
969 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
970 break;
971 case 7:
972 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
973 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
974 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
975 TILE_SPLIT(split_equal_to_row_size));
976 break;
977 case 8:
978 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
979 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
980 break;
981 case 9:
982 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
983 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
984 break;
985 case 10:
986 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
987 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
988 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
989 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
990 break;
991 case 11:
992 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
993 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
994 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
996 break;
997 case 12:
998 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
999 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1000 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1001 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1002 break;
1003 case 13:
1004 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1005 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1006 break;
1007 case 14:
1008 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1009 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1010 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1012 break;
1013 case 16:
1014 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1015 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1016 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1018 break;
1019 case 17:
1020 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1021 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1022 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1024 break;
1025 case 27:
1026 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1027 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1028 break;
1029 case 28:
1030 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1031 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1032 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1033 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1034 break;
1035 case 29:
1036 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1037 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1038 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1039 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1040 break;
1041 case 30:
1042 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1043 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1045 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1046 break;
1047 default:
1048 gb_tile_moden = 0;
1049 break;
1050 }
1051 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1052 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1053 }
1054 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1055 switch (reg_offset) {
1056 case 0:
1057 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1058 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1059 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1060 NUM_BANKS(ADDR_SURF_16_BANK));
1061 break;
1062 case 1:
1063 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1064 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1065 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1066 NUM_BANKS(ADDR_SURF_16_BANK));
1067 break;
1068 case 2:
1069 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1070 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1071 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1072 NUM_BANKS(ADDR_SURF_16_BANK));
1073 break;
1074 case 3:
1075 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1076 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1077 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1078 NUM_BANKS(ADDR_SURF_16_BANK));
1079 break;
1080 case 4:
1081 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1084 NUM_BANKS(ADDR_SURF_8_BANK));
1085 break;
1086 case 5:
1087 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1088 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1089 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1090 NUM_BANKS(ADDR_SURF_4_BANK));
1091 break;
1092 case 6:
1093 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1094 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1095 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1096 NUM_BANKS(ADDR_SURF_2_BANK));
1097 break;
1098 case 8:
1099 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1100 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1101 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1102 NUM_BANKS(ADDR_SURF_16_BANK));
1103 break;
1104 case 9:
1105 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1106 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1107 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1108 NUM_BANKS(ADDR_SURF_16_BANK));
1109 break;
1110 case 10:
1111 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1112 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1113 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1114 NUM_BANKS(ADDR_SURF_16_BANK));
1115 break;
1116 case 11:
1117 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1118 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1119 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1120 NUM_BANKS(ADDR_SURF_16_BANK));
1121 break;
1122 case 12:
1123 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1124 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1125 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1126 NUM_BANKS(ADDR_SURF_8_BANK));
1127 break;
1128 case 13:
1129 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1130 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1131 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1132 NUM_BANKS(ADDR_SURF_4_BANK));
1133 break;
1134 case 14:
1135 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1136 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1137 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1138 NUM_BANKS(ADDR_SURF_2_BANK));
1139 break;
1140 default:
1141 gb_tile_moden = 0;
1142 break;
1143 }
1144 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1145 }
1146 } else if (num_pipe_configs == 4) {
1147 if (num_rbs == 4) {
1148 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1149 switch (reg_offset) {
1150 case 0:
1151 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1152 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1153 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1154 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1155 break;
1156 case 1:
1157 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1159 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1160 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1161 break;
1162 case 2:
1163 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1164 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1165 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1166 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1167 break;
1168 case 3:
1169 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1171 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1172 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1173 break;
1174 case 4:
1175 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1176 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1177 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1178 TILE_SPLIT(split_equal_to_row_size));
1179 break;
1180 case 5:
1181 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1182 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1183 break;
1184 case 6:
1185 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1186 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1187 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1188 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1189 break;
1190 case 7:
1191 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1192 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1193 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1194 TILE_SPLIT(split_equal_to_row_size));
1195 break;
1196 case 8:
1197 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1198 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1199 break;
1200 case 9:
1201 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1202 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1203 break;
1204 case 10:
1205 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1206 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1207 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1209 break;
1210 case 11:
1211 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1212 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1213 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1214 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1215 break;
1216 case 12:
1217 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1218 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1219 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1221 break;
1222 case 13:
1223 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1224 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1225 break;
1226 case 14:
1227 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1228 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1229 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1230 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1231 break;
1232 case 16:
1233 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1234 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1235 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1237 break;
1238 case 17:
1239 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1240 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1241 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1242 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1243 break;
1244 case 27:
1245 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1246 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1247 break;
1248 case 28:
1249 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1250 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1251 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1252 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1253 break;
1254 case 29:
1255 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1256 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1257 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1258 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1259 break;
1260 case 30:
1261 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1262 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1263 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1264 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1265 break;
1266 default:
1267 gb_tile_moden = 0;
1268 break;
1269 }
1270 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1271 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1272 }
1273 } else if (num_rbs < 4) {
1274 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1275 switch (reg_offset) {
1276 case 0:
1277 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1278 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1279 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1280 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1281 break;
1282 case 1:
1283 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1284 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1285 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1287 break;
1288 case 2:
1289 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1290 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1291 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1292 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1293 break;
1294 case 3:
1295 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1296 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1297 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1299 break;
1300 case 4:
1301 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1302 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1303 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1304 TILE_SPLIT(split_equal_to_row_size));
1305 break;
1306 case 5:
1307 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1308 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1309 break;
1310 case 6:
1311 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1312 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1313 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1314 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1315 break;
1316 case 7:
1317 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1318 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1319 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1320 TILE_SPLIT(split_equal_to_row_size));
1321 break;
1322 case 8:
1323 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1324 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1325 break;
1326 case 9:
1327 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1328 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1329 break;
1330 case 10:
1331 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1332 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1333 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1334 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1335 break;
1336 case 11:
1337 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1338 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1339 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1340 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1341 break;
1342 case 12:
1343 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1344 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1345 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1346 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1347 break;
1348 case 13:
1349 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1350 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1351 break;
1352 case 14:
1353 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1356 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1357 break;
1358 case 16:
1359 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1360 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1361 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1362 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1363 break;
1364 case 17:
1365 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1366 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1367 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1368 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1369 break;
1370 case 27:
1371 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1373 break;
1374 case 28:
1375 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1377 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1378 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1379 break;
1380 case 29:
1381 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1382 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1383 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1384 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1385 break;
1386 case 30:
1387 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1388 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1389 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1390 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1391 break;
1392 default:
1393 gb_tile_moden = 0;
1394 break;
1395 }
1396 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1397 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1398 }
1399 }
1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1401 switch (reg_offset) {
1402 case 0:
1403 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1406 NUM_BANKS(ADDR_SURF_16_BANK));
1407 break;
1408 case 1:
1409 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1412 NUM_BANKS(ADDR_SURF_16_BANK));
1413 break;
1414 case 2:
1415 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1418 NUM_BANKS(ADDR_SURF_16_BANK));
1419 break;
1420 case 3:
1421 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1424 NUM_BANKS(ADDR_SURF_16_BANK));
1425 break;
1426 case 4:
1427 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1430 NUM_BANKS(ADDR_SURF_16_BANK));
1431 break;
1432 case 5:
1433 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1436 NUM_BANKS(ADDR_SURF_8_BANK));
1437 break;
1438 case 6:
1439 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1442 NUM_BANKS(ADDR_SURF_4_BANK));
1443 break;
1444 case 8:
1445 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448 NUM_BANKS(ADDR_SURF_16_BANK));
1449 break;
1450 case 9:
1451 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454 NUM_BANKS(ADDR_SURF_16_BANK));
1455 break;
1456 case 10:
1457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1460 NUM_BANKS(ADDR_SURF_16_BANK));
1461 break;
1462 case 11:
1463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1466 NUM_BANKS(ADDR_SURF_16_BANK));
1467 break;
1468 case 12:
1469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1472 NUM_BANKS(ADDR_SURF_16_BANK));
1473 break;
1474 case 13:
1475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1478 NUM_BANKS(ADDR_SURF_8_BANK));
1479 break;
1480 case 14:
1481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1484 NUM_BANKS(ADDR_SURF_4_BANK));
1485 break;
1486 default:
1487 gb_tile_moden = 0;
1488 break;
1489 }
1490 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1491 }
1492 } else if (num_pipe_configs == 2) {
1493 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1494 switch (reg_offset) {
1495 case 0:
1496 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1497 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1498 PIPE_CONFIG(ADDR_SURF_P2) |
1499 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1500 break;
1501 case 1:
1502 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1504 PIPE_CONFIG(ADDR_SURF_P2) |
1505 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1506 break;
1507 case 2:
1508 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1510 PIPE_CONFIG(ADDR_SURF_P2) |
1511 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1512 break;
1513 case 3:
1514 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1515 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1516 PIPE_CONFIG(ADDR_SURF_P2) |
1517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1518 break;
1519 case 4:
1520 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1522 PIPE_CONFIG(ADDR_SURF_P2) |
1523 TILE_SPLIT(split_equal_to_row_size));
1524 break;
1525 case 5:
1526 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1527 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1528 break;
1529 case 6:
1530 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1531 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1532 PIPE_CONFIG(ADDR_SURF_P2) |
1533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1534 break;
1535 case 7:
1536 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1537 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1538 PIPE_CONFIG(ADDR_SURF_P2) |
1539 TILE_SPLIT(split_equal_to_row_size));
1540 break;
1541 case 8:
1542 gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1543 break;
1544 case 9:
1545 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1546 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1547 break;
1548 case 10:
1549 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551 PIPE_CONFIG(ADDR_SURF_P2) |
1552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1553 break;
1554 case 11:
1555 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1557 PIPE_CONFIG(ADDR_SURF_P2) |
1558 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1559 break;
1560 case 12:
1561 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1562 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1563 PIPE_CONFIG(ADDR_SURF_P2) |
1564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1565 break;
1566 case 13:
1567 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1569 break;
1570 case 14:
1571 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1572 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1573 PIPE_CONFIG(ADDR_SURF_P2) |
1574 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1575 break;
1576 case 16:
1577 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1578 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1579 PIPE_CONFIG(ADDR_SURF_P2) |
1580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1581 break;
1582 case 17:
1583 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1584 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1585 PIPE_CONFIG(ADDR_SURF_P2) |
1586 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1587 break;
1588 case 27:
1589 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1590 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1591 break;
1592 case 28:
1593 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1594 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1595 PIPE_CONFIG(ADDR_SURF_P2) |
1596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1597 break;
1598 case 29:
1599 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1600 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1601 PIPE_CONFIG(ADDR_SURF_P2) |
1602 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1603 break;
1604 case 30:
1605 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1606 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1607 PIPE_CONFIG(ADDR_SURF_P2) |
1608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1609 break;
1610 default:
1611 gb_tile_moden = 0;
1612 break;
1613 }
1614 rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1615 WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1616 }
1617 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1618 switch (reg_offset) {
1619 case 0:
1620 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1621 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1622 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1623 NUM_BANKS(ADDR_SURF_16_BANK));
1624 break;
1625 case 1:
1626 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1627 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1628 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1629 NUM_BANKS(ADDR_SURF_16_BANK));
1630 break;
1631 case 2:
1632 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1633 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1634 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1635 NUM_BANKS(ADDR_SURF_16_BANK));
1636 break;
1637 case 3:
1638 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1641 NUM_BANKS(ADDR_SURF_16_BANK));
1642 break;
1643 case 4:
1644 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1645 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1646 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1647 NUM_BANKS(ADDR_SURF_16_BANK));
1648 break;
1649 case 5:
1650 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1653 NUM_BANKS(ADDR_SURF_16_BANK));
1654 break;
1655 case 6:
1656 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1657 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1658 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1659 NUM_BANKS(ADDR_SURF_8_BANK));
1660 break;
1661 case 8:
1662 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1665 NUM_BANKS(ADDR_SURF_16_BANK));
1666 break;
1667 case 9:
1668 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1669 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1670 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1671 NUM_BANKS(ADDR_SURF_16_BANK));
1672 break;
1673 case 10:
1674 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1677 NUM_BANKS(ADDR_SURF_16_BANK));
1678 break;
1679 case 11:
1680 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1681 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1682 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1683 NUM_BANKS(ADDR_SURF_16_BANK));
1684 break;
1685 case 12:
1686 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1687 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1688 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1689 NUM_BANKS(ADDR_SURF_16_BANK));
1690 break;
1691 case 13:
1692 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1693 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1694 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1695 NUM_BANKS(ADDR_SURF_16_BANK));
1696 break;
1697 case 14:
1698 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1699 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1700 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1701 NUM_BANKS(ADDR_SURF_8_BANK));
1702 break;
1703 default:
1704 gb_tile_moden = 0;
1705 break;
1706 }
1707 WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1708 }
1709 } else
1710 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1711 }
1712
1713 /**
1714 * cik_select_se_sh - select which SE, SH to address
1715 *
1716 * @rdev: radeon_device pointer
1717 * @se_num: shader engine to address
1718 * @sh_num: sh block to address
1719 *
1720 * Select which SE, SH combinations to address. Certain
1721 * registers are instanced per SE or SH. 0xffffffff means
1722 * broadcast to all SEs or SHs (CIK).
1723 */
1724 static void cik_select_se_sh(struct radeon_device *rdev,
1725 u32 se_num, u32 sh_num)
1726 {
1727 u32 data = INSTANCE_BROADCAST_WRITES;
1728
1729 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1730 data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1731 else if (se_num == 0xffffffff)
1732 data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1733 else if (sh_num == 0xffffffff)
1734 data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1735 else
1736 data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1737 WREG32(GRBM_GFX_INDEX, data);
1738 }
1739
1740 /**
1741 * cik_create_bitmask - create a bitmask
1742 *
1743 * @bit_width: length of the mask
1744 *
1745 * create a variable length bit mask (CIK).
1746 * Returns the bitmask.
1747 */
1748 static u32 cik_create_bitmask(u32 bit_width)
1749 {
1750 u32 i, mask = 0;
1751
1752 for (i = 0; i < bit_width; i++) {
1753 mask <<= 1;
1754 mask |= 1;
1755 }
1756 return mask;
1757 }
1758
1759 /**
1760 * cik_select_se_sh - select which SE, SH to address
1761 *
1762 * @rdev: radeon_device pointer
1763 * @max_rb_num: max RBs (render backends) for the asic
1764 * @se_num: number of SEs (shader engines) for the asic
1765 * @sh_per_se: number of SH blocks per SE for the asic
1766 *
1767 * Calculates the bitmask of disabled RBs (CIK).
1768 * Returns the disabled RB bitmask.
1769 */
1770 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1771 u32 max_rb_num, u32 se_num,
1772 u32 sh_per_se)
1773 {
1774 u32 data, mask;
1775
1776 data = RREG32(CC_RB_BACKEND_DISABLE);
1777 if (data & 1)
1778 data &= BACKEND_DISABLE_MASK;
1779 else
1780 data = 0;
1781 data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1782
1783 data >>= BACKEND_DISABLE_SHIFT;
1784
1785 mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1786
1787 return data & mask;
1788 }
1789
1790 /**
1791 * cik_setup_rb - setup the RBs on the asic
1792 *
1793 * @rdev: radeon_device pointer
1794 * @se_num: number of SEs (shader engines) for the asic
1795 * @sh_per_se: number of SH blocks per SE for the asic
1796 * @max_rb_num: max RBs (render backends) for the asic
1797 *
1798 * Configures per-SE/SH RB registers (CIK).
1799 */
1800 static void cik_setup_rb(struct radeon_device *rdev,
1801 u32 se_num, u32 sh_per_se,
1802 u32 max_rb_num)
1803 {
1804 int i, j;
1805 u32 data, mask;
1806 u32 disabled_rbs = 0;
1807 u32 enabled_rbs = 0;
1808
1809 for (i = 0; i < se_num; i++) {
1810 for (j = 0; j < sh_per_se; j++) {
1811 cik_select_se_sh(rdev, i, j);
1812 data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1813 disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1814 }
1815 }
1816 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1817
1818 mask = 1;
1819 for (i = 0; i < max_rb_num; i++) {
1820 if (!(disabled_rbs & mask))
1821 enabled_rbs |= mask;
1822 mask <<= 1;
1823 }
1824
1825 for (i = 0; i < se_num; i++) {
1826 cik_select_se_sh(rdev, i, 0xffffffff);
1827 data = 0;
1828 for (j = 0; j < sh_per_se; j++) {
1829 switch (enabled_rbs & 3) {
1830 case 1:
1831 data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1832 break;
1833 case 2:
1834 data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1835 break;
1836 case 3:
1837 default:
1838 data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1839 break;
1840 }
1841 enabled_rbs >>= 2;
1842 }
1843 WREG32(PA_SC_RASTER_CONFIG, data);
1844 }
1845 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1846 }
1847
1848 /**
1849 * cik_gpu_init - setup the 3D engine
1850 *
1851 * @rdev: radeon_device pointer
1852 *
1853 * Configures the 3D engine and tiling configuration
1854 * registers so that the 3D engine is usable.
1855 */
1856 static void cik_gpu_init(struct radeon_device *rdev)
1857 {
1858 u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1859 u32 mc_shared_chmap, mc_arb_ramcfg;
1860 u32 hdp_host_path_cntl;
1861 u32 tmp;
1862 int i, j;
1863
1864 switch (rdev->family) {
1865 case CHIP_BONAIRE:
1866 rdev->config.cik.max_shader_engines = 2;
1867 rdev->config.cik.max_tile_pipes = 4;
1868 rdev->config.cik.max_cu_per_sh = 7;
1869 rdev->config.cik.max_sh_per_se = 1;
1870 rdev->config.cik.max_backends_per_se = 2;
1871 rdev->config.cik.max_texture_channel_caches = 4;
1872 rdev->config.cik.max_gprs = 256;
1873 rdev->config.cik.max_gs_threads = 32;
1874 rdev->config.cik.max_hw_contexts = 8;
1875
1876 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1877 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1878 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1879 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1880 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1881 break;
1882 case CHIP_KAVERI:
1883 /* TODO */
1884 break;
1885 case CHIP_KABINI:
1886 default:
1887 rdev->config.cik.max_shader_engines = 1;
1888 rdev->config.cik.max_tile_pipes = 2;
1889 rdev->config.cik.max_cu_per_sh = 2;
1890 rdev->config.cik.max_sh_per_se = 1;
1891 rdev->config.cik.max_backends_per_se = 1;
1892 rdev->config.cik.max_texture_channel_caches = 2;
1893 rdev->config.cik.max_gprs = 256;
1894 rdev->config.cik.max_gs_threads = 16;
1895 rdev->config.cik.max_hw_contexts = 8;
1896
1897 rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1898 rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1899 rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1900 rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1901 gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1902 break;
1903 }
1904
1905 /* Initialize HDP */
1906 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1907 WREG32((0x2c14 + j), 0x00000000);
1908 WREG32((0x2c18 + j), 0x00000000);
1909 WREG32((0x2c1c + j), 0x00000000);
1910 WREG32((0x2c20 + j), 0x00000000);
1911 WREG32((0x2c24 + j), 0x00000000);
1912 }
1913
1914 WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1915
1916 WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1917
1918 mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1919 mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1920
1921 rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1922 rdev->config.cik.mem_max_burst_length_bytes = 256;
1923 tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1924 rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1925 if (rdev->config.cik.mem_row_size_in_kb > 4)
1926 rdev->config.cik.mem_row_size_in_kb = 4;
1927 /* XXX use MC settings? */
1928 rdev->config.cik.shader_engine_tile_size = 32;
1929 rdev->config.cik.num_gpus = 1;
1930 rdev->config.cik.multi_gpu_tile_size = 64;
1931
1932 /* fix up row size */
1933 gb_addr_config &= ~ROW_SIZE_MASK;
1934 switch (rdev->config.cik.mem_row_size_in_kb) {
1935 case 1:
1936 default:
1937 gb_addr_config |= ROW_SIZE(0);
1938 break;
1939 case 2:
1940 gb_addr_config |= ROW_SIZE(1);
1941 break;
1942 case 4:
1943 gb_addr_config |= ROW_SIZE(2);
1944 break;
1945 }
1946
1947 /* setup tiling info dword. gb_addr_config is not adequate since it does
1948 * not have bank info, so create a custom tiling dword.
1949 * bits 3:0 num_pipes
1950 * bits 7:4 num_banks
1951 * bits 11:8 group_size
1952 * bits 15:12 row_size
1953 */
1954 rdev->config.cik.tile_config = 0;
1955 switch (rdev->config.cik.num_tile_pipes) {
1956 case 1:
1957 rdev->config.cik.tile_config |= (0 << 0);
1958 break;
1959 case 2:
1960 rdev->config.cik.tile_config |= (1 << 0);
1961 break;
1962 case 4:
1963 rdev->config.cik.tile_config |= (2 << 0);
1964 break;
1965 case 8:
1966 default:
1967 /* XXX what about 12? */
1968 rdev->config.cik.tile_config |= (3 << 0);
1969 break;
1970 }
1971 if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1972 rdev->config.cik.tile_config |= 1 << 4;
1973 else
1974 rdev->config.cik.tile_config |= 0 << 4;
1975 rdev->config.cik.tile_config |=
1976 ((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1977 rdev->config.cik.tile_config |=
1978 ((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1979
1980 WREG32(GB_ADDR_CONFIG, gb_addr_config);
1981 WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1982 WREG32(DMIF_ADDR_CALC, gb_addr_config);
1983 WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1984 WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1985 WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1986 WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1987 WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1988
1989 cik_tiling_mode_table_init(rdev);
1990
1991 cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
1992 rdev->config.cik.max_sh_per_se,
1993 rdev->config.cik.max_backends_per_se);
1994
1995 /* set HW defaults for 3D engine */
1996 WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
1997
1998 WREG32(SX_DEBUG_1, 0x20);
1999
2000 WREG32(TA_CNTL_AUX, 0x00010000);
2001
2002 tmp = RREG32(SPI_CONFIG_CNTL);
2003 tmp |= 0x03000000;
2004 WREG32(SPI_CONFIG_CNTL, tmp);
2005
2006 WREG32(SQ_CONFIG, 1);
2007
2008 WREG32(DB_DEBUG, 0);
2009
2010 tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2011 tmp |= 0x00000400;
2012 WREG32(DB_DEBUG2, tmp);
2013
2014 tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2015 tmp |= 0x00020200;
2016 WREG32(DB_DEBUG3, tmp);
2017
2018 tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2019 tmp |= 0x00018208;
2020 WREG32(CB_HW_CONTROL, tmp);
2021
2022 WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2023
2024 WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2025 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2026 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2027 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2028
2029 WREG32(VGT_NUM_INSTANCES, 1);
2030
2031 WREG32(CP_PERFMON_CNTL, 0);
2032
2033 WREG32(SQ_CONFIG, 0);
2034
2035 WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2036 FORCE_EOV_MAX_REZ_CNT(255)));
2037
2038 WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2039 AUTO_INVLD_EN(ES_AND_GS_AUTO));
2040
2041 WREG32(VGT_GS_VERTEX_REUSE, 16);
2042 WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2043
2044 tmp = RREG32(HDP_MISC_CNTL);
2045 tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2046 WREG32(HDP_MISC_CNTL, tmp);
2047
2048 hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2049 WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2050
2051 WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2052 WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2053
2054 udelay(50);
2055 }
2056
2057 /*
2058 * GPU scratch registers helpers function.
2059 */
2060 /**
2061 * cik_scratch_init - setup driver info for CP scratch regs
2062 *
2063 * @rdev: radeon_device pointer
2064 *
2065 * Set up the number and offset of the CP scratch registers.
2066 * NOTE: use of CP scratch registers is a legacy inferface and
2067 * is not used by default on newer asics (r6xx+). On newer asics,
2068 * memory buffers are used for fences rather than scratch regs.
2069 */
2070 static void cik_scratch_init(struct radeon_device *rdev)
2071 {
2072 int i;
2073
2074 rdev->scratch.num_reg = 7;
2075 rdev->scratch.reg_base = SCRATCH_REG0;
2076 for (i = 0; i < rdev->scratch.num_reg; i++) {
2077 rdev->scratch.free[i] = true;
2078 rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2079 }
2080 }
2081
2082 /**
2083 * cik_ring_test - basic gfx ring test
2084 *
2085 * @rdev: radeon_device pointer
2086 * @ring: radeon_ring structure holding ring information
2087 *
2088 * Allocate a scratch register and write to it using the gfx ring (CIK).
2089 * Provides a basic gfx ring test to verify that the ring is working.
2090 * Used by cik_cp_gfx_resume();
2091 * Returns 0 on success, error on failure.
2092 */
2093 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2094 {
2095 uint32_t scratch;
2096 uint32_t tmp = 0;
2097 unsigned i;
2098 int r;
2099
2100 r = radeon_scratch_get(rdev, &scratch);
2101 if (r) {
2102 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2103 return r;
2104 }
2105 WREG32(scratch, 0xCAFEDEAD);
2106 r = radeon_ring_lock(rdev, ring, 3);
2107 if (r) {
2108 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2109 radeon_scratch_free(rdev, scratch);
2110 return r;
2111 }
2112 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2113 radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2114 radeon_ring_write(ring, 0xDEADBEEF);
2115 radeon_ring_unlock_commit(rdev, ring);
2116
2117 for (i = 0; i < rdev->usec_timeout; i++) {
2118 tmp = RREG32(scratch);
2119 if (tmp == 0xDEADBEEF)
2120 break;
2121 DRM_UDELAY(1);
2122 }
2123 if (i < rdev->usec_timeout) {
2124 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2125 } else {
2126 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2127 ring->idx, scratch, tmp);
2128 r = -EINVAL;
2129 }
2130 radeon_scratch_free(rdev, scratch);
2131 return r;
2132 }
2133
2134 /**
2135 * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2136 *
2137 * @rdev: radeon_device pointer
2138 * @fence: radeon fence object
2139 *
2140 * Emits a fence sequnce number on the gfx ring and flushes
2141 * GPU caches.
2142 */
2143 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2144 struct radeon_fence *fence)
2145 {
2146 struct radeon_ring *ring = &rdev->ring[fence->ring];
2147 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2148
2149 /* EVENT_WRITE_EOP - flush caches, send int */
2150 radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2151 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2152 EOP_TC_ACTION_EN |
2153 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2154 EVENT_INDEX(5)));
2155 radeon_ring_write(ring, addr & 0xfffffffc);
2156 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2157 radeon_ring_write(ring, fence->seq);
2158 radeon_ring_write(ring, 0);
2159 /* HDP flush */
2160 /* We should be using the new WAIT_REG_MEM special op packet here
2161 * but it causes the CP to hang
2162 */
2163 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2164 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2165 WRITE_DATA_DST_SEL(0)));
2166 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2167 radeon_ring_write(ring, 0);
2168 radeon_ring_write(ring, 0);
2169 }
2170
2171 /**
2172 * cik_fence_compute_ring_emit - emit a fence on the compute ring
2173 *
2174 * @rdev: radeon_device pointer
2175 * @fence: radeon fence object
2176 *
2177 * Emits a fence sequnce number on the compute ring and flushes
2178 * GPU caches.
2179 */
2180 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2181 struct radeon_fence *fence)
2182 {
2183 struct radeon_ring *ring = &rdev->ring[fence->ring];
2184 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2185
2186 /* RELEASE_MEM - flush caches, send int */
2187 radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2188 radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2189 EOP_TC_ACTION_EN |
2190 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2191 EVENT_INDEX(5)));
2192 radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2193 radeon_ring_write(ring, addr & 0xfffffffc);
2194 radeon_ring_write(ring, upper_32_bits(addr));
2195 radeon_ring_write(ring, fence->seq);
2196 radeon_ring_write(ring, 0);
2197 /* HDP flush */
2198 /* We should be using the new WAIT_REG_MEM special op packet here
2199 * but it causes the CP to hang
2200 */
2201 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2202 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2203 WRITE_DATA_DST_SEL(0)));
2204 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2205 radeon_ring_write(ring, 0);
2206 radeon_ring_write(ring, 0);
2207 }
2208
2209 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2210 struct radeon_ring *ring,
2211 struct radeon_semaphore *semaphore,
2212 bool emit_wait)
2213 {
2214 uint64_t addr = semaphore->gpu_addr;
2215 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2216
2217 radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2218 radeon_ring_write(ring, addr & 0xffffffff);
2219 radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2220 }
2221
2222 /*
2223 * IB stuff
2224 */
2225 /**
2226 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2227 *
2228 * @rdev: radeon_device pointer
2229 * @ib: radeon indirect buffer object
2230 *
2231 * Emits an DE (drawing engine) or CE (constant engine) IB
2232 * on the gfx ring. IBs are usually generated by userspace
2233 * acceleration drivers and submitted to the kernel for
2234 * sheduling on the ring. This function schedules the IB
2235 * on the gfx ring for execution by the GPU.
2236 */
2237 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2238 {
2239 struct radeon_ring *ring = &rdev->ring[ib->ring];
2240 u32 header, control = INDIRECT_BUFFER_VALID;
2241
2242 if (ib->is_const_ib) {
2243 /* set switch buffer packet before const IB */
2244 radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2245 radeon_ring_write(ring, 0);
2246
2247 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2248 } else {
2249 u32 next_rptr;
2250 if (ring->rptr_save_reg) {
2251 next_rptr = ring->wptr + 3 + 4;
2252 radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2253 radeon_ring_write(ring, ((ring->rptr_save_reg -
2254 PACKET3_SET_UCONFIG_REG_START) >> 2));
2255 radeon_ring_write(ring, next_rptr);
2256 } else if (rdev->wb.enabled) {
2257 next_rptr = ring->wptr + 5 + 4;
2258 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2259 radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2260 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2261 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2262 radeon_ring_write(ring, next_rptr);
2263 }
2264
2265 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2266 }
2267
2268 control |= ib->length_dw |
2269 (ib->vm ? (ib->vm->id << 24) : 0);
2270
2271 radeon_ring_write(ring, header);
2272 radeon_ring_write(ring,
2273 #ifdef __BIG_ENDIAN
2274 (2 << 0) |
2275 #endif
2276 (ib->gpu_addr & 0xFFFFFFFC));
2277 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2278 radeon_ring_write(ring, control);
2279 }
2280
2281 /**
2282 * cik_ib_test - basic gfx ring IB test
2283 *
2284 * @rdev: radeon_device pointer
2285 * @ring: radeon_ring structure holding ring information
2286 *
2287 * Allocate an IB and execute it on the gfx ring (CIK).
2288 * Provides a basic gfx ring test to verify that IBs are working.
2289 * Returns 0 on success, error on failure.
2290 */
2291 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2292 {
2293 struct radeon_ib ib;
2294 uint32_t scratch;
2295 uint32_t tmp = 0;
2296 unsigned i;
2297 int r;
2298
2299 r = radeon_scratch_get(rdev, &scratch);
2300 if (r) {
2301 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2302 return r;
2303 }
2304 WREG32(scratch, 0xCAFEDEAD);
2305 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2306 if (r) {
2307 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2308 return r;
2309 }
2310 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2311 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2312 ib.ptr[2] = 0xDEADBEEF;
2313 ib.length_dw = 3;
2314 r = radeon_ib_schedule(rdev, &ib, NULL);
2315 if (r) {
2316 radeon_scratch_free(rdev, scratch);
2317 radeon_ib_free(rdev, &ib);
2318 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2319 return r;
2320 }
2321 r = radeon_fence_wait(ib.fence, false);
2322 if (r) {
2323 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2324 return r;
2325 }
2326 for (i = 0; i < rdev->usec_timeout; i++) {
2327 tmp = RREG32(scratch);
2328 if (tmp == 0xDEADBEEF)
2329 break;
2330 DRM_UDELAY(1);
2331 }
2332 if (i < rdev->usec_timeout) {
2333 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2334 } else {
2335 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2336 scratch, tmp);
2337 r = -EINVAL;
2338 }
2339 radeon_scratch_free(rdev, scratch);
2340 radeon_ib_free(rdev, &ib);
2341 return r;
2342 }
2343
2344 /*
2345 * CP.
2346 * On CIK, gfx and compute now have independant command processors.
2347 *
2348 * GFX
2349 * Gfx consists of a single ring and can process both gfx jobs and
2350 * compute jobs. The gfx CP consists of three microengines (ME):
2351 * PFP - Pre-Fetch Parser
2352 * ME - Micro Engine
2353 * CE - Constant Engine
2354 * The PFP and ME make up what is considered the Drawing Engine (DE).
2355 * The CE is an asynchronous engine used for updating buffer desciptors
2356 * used by the DE so that they can be loaded into cache in parallel
2357 * while the DE is processing state update packets.
2358 *
2359 * Compute
2360 * The compute CP consists of two microengines (ME):
2361 * MEC1 - Compute MicroEngine 1
2362 * MEC2 - Compute MicroEngine 2
2363 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2364 * The queues are exposed to userspace and are programmed directly
2365 * by the compute runtime.
2366 */
2367 /**
2368 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2369 *
2370 * @rdev: radeon_device pointer
2371 * @enable: enable or disable the MEs
2372 *
2373 * Halts or unhalts the gfx MEs.
2374 */
2375 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2376 {
2377 if (enable)
2378 WREG32(CP_ME_CNTL, 0);
2379 else {
2380 WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2381 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2382 }
2383 udelay(50);
2384 }
2385
2386 /**
2387 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2388 *
2389 * @rdev: radeon_device pointer
2390 *
2391 * Loads the gfx PFP, ME, and CE ucode.
2392 * Returns 0 for success, -EINVAL if the ucode is not available.
2393 */
2394 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2395 {
2396 const __be32 *fw_data;
2397 int i;
2398
2399 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2400 return -EINVAL;
2401
2402 cik_cp_gfx_enable(rdev, false);
2403
2404 /* PFP */
2405 fw_data = (const __be32 *)rdev->pfp_fw->data;
2406 WREG32(CP_PFP_UCODE_ADDR, 0);
2407 for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2408 WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2409 WREG32(CP_PFP_UCODE_ADDR, 0);
2410
2411 /* CE */
2412 fw_data = (const __be32 *)rdev->ce_fw->data;
2413 WREG32(CP_CE_UCODE_ADDR, 0);
2414 for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2415 WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2416 WREG32(CP_CE_UCODE_ADDR, 0);
2417
2418 /* ME */
2419 fw_data = (const __be32 *)rdev->me_fw->data;
2420 WREG32(CP_ME_RAM_WADDR, 0);
2421 for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2422 WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2423 WREG32(CP_ME_RAM_WADDR, 0);
2424
2425 WREG32(CP_PFP_UCODE_ADDR, 0);
2426 WREG32(CP_CE_UCODE_ADDR, 0);
2427 WREG32(CP_ME_RAM_WADDR, 0);
2428 WREG32(CP_ME_RAM_RADDR, 0);
2429 return 0;
2430 }
2431
2432 /**
2433 * cik_cp_gfx_start - start the gfx ring
2434 *
2435 * @rdev: radeon_device pointer
2436 *
2437 * Enables the ring and loads the clear state context and other
2438 * packets required to init the ring.
2439 * Returns 0 for success, error for failure.
2440 */
2441 static int cik_cp_gfx_start(struct radeon_device *rdev)
2442 {
2443 struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2444 int r, i;
2445
2446 /* init the CP */
2447 WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2448 WREG32(CP_ENDIAN_SWAP, 0);
2449 WREG32(CP_DEVICE_ID, 1);
2450
2451 cik_cp_gfx_enable(rdev, true);
2452
2453 r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2454 if (r) {
2455 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2456 return r;
2457 }
2458
2459 /* init the CE partitions. CE only used for gfx on CIK */
2460 radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2461 radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2462 radeon_ring_write(ring, 0xc000);
2463 radeon_ring_write(ring, 0xc000);
2464
2465 /* setup clear context state */
2466 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2467 radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2468
2469 radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2470 radeon_ring_write(ring, 0x80000000);
2471 radeon_ring_write(ring, 0x80000000);
2472
2473 for (i = 0; i < cik_default_size; i++)
2474 radeon_ring_write(ring, cik_default_state[i]);
2475
2476 radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2477 radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2478
2479 /* set clear context state */
2480 radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2481 radeon_ring_write(ring, 0);
2482
2483 radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2484 radeon_ring_write(ring, 0x00000316);
2485 radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2486 radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2487
2488 radeon_ring_unlock_commit(rdev, ring);
2489
2490 return 0;
2491 }
2492
2493 /**
2494 * cik_cp_gfx_fini - stop the gfx ring
2495 *
2496 * @rdev: radeon_device pointer
2497 *
2498 * Stop the gfx ring and tear down the driver ring
2499 * info.
2500 */
2501 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2502 {
2503 cik_cp_gfx_enable(rdev, false);
2504 radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2505 }
2506
2507 /**
2508 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2509 *
2510 * @rdev: radeon_device pointer
2511 *
2512 * Program the location and size of the gfx ring buffer
2513 * and test it to make sure it's working.
2514 * Returns 0 for success, error for failure.
2515 */
2516 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2517 {
2518 struct radeon_ring *ring;
2519 u32 tmp;
2520 u32 rb_bufsz;
2521 u64 rb_addr;
2522 int r;
2523
2524 WREG32(CP_SEM_WAIT_TIMER, 0x0);
2525 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2526
2527 /* Set the write pointer delay */
2528 WREG32(CP_RB_WPTR_DELAY, 0);
2529
2530 /* set the RB to use vmid 0 */
2531 WREG32(CP_RB_VMID, 0);
2532
2533 WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2534
2535 /* ring 0 - compute and gfx */
2536 /* Set ring buffer size */
2537 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2538 rb_bufsz = drm_order(ring->ring_size / 8);
2539 tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2540 #ifdef __BIG_ENDIAN
2541 tmp |= BUF_SWAP_32BIT;
2542 #endif
2543 WREG32(CP_RB0_CNTL, tmp);
2544
2545 /* Initialize the ring buffer's read and write pointers */
2546 WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2547 ring->wptr = 0;
2548 WREG32(CP_RB0_WPTR, ring->wptr);
2549
2550 /* set the wb address wether it's enabled or not */
2551 WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2552 WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2553
2554 /* scratch register shadowing is no longer supported */
2555 WREG32(SCRATCH_UMSK, 0);
2556
2557 if (!rdev->wb.enabled)
2558 tmp |= RB_NO_UPDATE;
2559
2560 mdelay(1);
2561 WREG32(CP_RB0_CNTL, tmp);
2562
2563 rb_addr = ring->gpu_addr >> 8;
2564 WREG32(CP_RB0_BASE, rb_addr);
2565 WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2566
2567 ring->rptr = RREG32(CP_RB0_RPTR);
2568
2569 /* start the ring */
2570 cik_cp_gfx_start(rdev);
2571 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2572 r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2573 if (r) {
2574 rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2575 return r;
2576 }
2577 return 0;
2578 }
2579
2580 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2581 struct radeon_ring *ring)
2582 {
2583 u32 rptr;
2584
2585
2586
2587 if (rdev->wb.enabled) {
2588 rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2589 } else {
2590 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2591 rptr = RREG32(CP_HQD_PQ_RPTR);
2592 cik_srbm_select(rdev, 0, 0, 0, 0);
2593 }
2594 rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2595
2596 return rptr;
2597 }
2598
2599 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2600 struct radeon_ring *ring)
2601 {
2602 u32 wptr;
2603
2604 if (rdev->wb.enabled) {
2605 wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2606 } else {
2607 cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2608 wptr = RREG32(CP_HQD_PQ_WPTR);
2609 cik_srbm_select(rdev, 0, 0, 0, 0);
2610 }
2611 wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2612
2613 return wptr;
2614 }
2615
2616 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2617 struct radeon_ring *ring)
2618 {
2619 u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2620
2621 rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2622 WDOORBELL32(ring->doorbell_offset, wptr);
2623 }
2624
2625 /**
2626 * cik_cp_compute_enable - enable/disable the compute CP MEs
2627 *
2628 * @rdev: radeon_device pointer
2629 * @enable: enable or disable the MEs
2630 *
2631 * Halts or unhalts the compute MEs.
2632 */
2633 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2634 {
2635 if (enable)
2636 WREG32(CP_MEC_CNTL, 0);
2637 else
2638 WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2639 udelay(50);
2640 }
2641
2642 /**
2643 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2644 *
2645 * @rdev: radeon_device pointer
2646 *
2647 * Loads the compute MEC1&2 ucode.
2648 * Returns 0 for success, -EINVAL if the ucode is not available.
2649 */
2650 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2651 {
2652 const __be32 *fw_data;
2653 int i;
2654
2655 if (!rdev->mec_fw)
2656 return -EINVAL;
2657
2658 cik_cp_compute_enable(rdev, false);
2659
2660 /* MEC1 */
2661 fw_data = (const __be32 *)rdev->mec_fw->data;
2662 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2663 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2664 WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2665 WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2666
2667 if (rdev->family == CHIP_KAVERI) {
2668 /* MEC2 */
2669 fw_data = (const __be32 *)rdev->mec_fw->data;
2670 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2671 for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2672 WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2673 WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2674 }
2675
2676 return 0;
2677 }
2678
2679 /**
2680 * cik_cp_compute_start - start the compute queues
2681 *
2682 * @rdev: radeon_device pointer
2683 *
2684 * Enable the compute queues.
2685 * Returns 0 for success, error for failure.
2686 */
2687 static int cik_cp_compute_start(struct radeon_device *rdev)
2688 {
2689 cik_cp_compute_enable(rdev, true);
2690
2691 return 0;
2692 }
2693
2694 /**
2695 * cik_cp_compute_fini - stop the compute queues
2696 *
2697 * @rdev: radeon_device pointer
2698 *
2699 * Stop the compute queues and tear down the driver queue
2700 * info.
2701 */
2702 static void cik_cp_compute_fini(struct radeon_device *rdev)
2703 {
2704 int i, idx, r;
2705
2706 cik_cp_compute_enable(rdev, false);
2707
2708 for (i = 0; i < 2; i++) {
2709 if (i == 0)
2710 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2711 else
2712 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2713
2714 if (rdev->ring[idx].mqd_obj) {
2715 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2716 if (unlikely(r != 0))
2717 dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2718
2719 radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2720 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2721
2722 radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2723 rdev->ring[idx].mqd_obj = NULL;
2724 }
2725 }
2726 }
2727
2728 static void cik_mec_fini(struct radeon_device *rdev)
2729 {
2730 int r;
2731
2732 if (rdev->mec.hpd_eop_obj) {
2733 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2734 if (unlikely(r != 0))
2735 dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2736 radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2737 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2738
2739 radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2740 rdev->mec.hpd_eop_obj = NULL;
2741 }
2742 }
2743
2744 #define MEC_HPD_SIZE 2048
2745
2746 static int cik_mec_init(struct radeon_device *rdev)
2747 {
2748 int r;
2749 u32 *hpd;
2750
2751 /*
2752 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2753 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2754 */
2755 if (rdev->family == CHIP_KAVERI)
2756 rdev->mec.num_mec = 2;
2757 else
2758 rdev->mec.num_mec = 1;
2759 rdev->mec.num_pipe = 4;
2760 rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2761
2762 if (rdev->mec.hpd_eop_obj == NULL) {
2763 r = radeon_bo_create(rdev,
2764 rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2765 PAGE_SIZE, true,
2766 RADEON_GEM_DOMAIN_GTT, NULL,
2767 &rdev->mec.hpd_eop_obj);
2768 if (r) {
2769 dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2770 return r;
2771 }
2772 }
2773
2774 r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2775 if (unlikely(r != 0)) {
2776 cik_mec_fini(rdev);
2777 return r;
2778 }
2779 r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2780 &rdev->mec.hpd_eop_gpu_addr);
2781 if (r) {
2782 dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2783 cik_mec_fini(rdev);
2784 return r;
2785 }
2786 r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2787 if (r) {
2788 dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2789 cik_mec_fini(rdev);
2790 return r;
2791 }
2792
2793 /* clear memory. Not sure if this is required or not */
2794 memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2795
2796 radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2797 radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2798
2799 return 0;
2800 }
2801
2802 struct hqd_registers
2803 {
2804 u32 cp_mqd_base_addr;
2805 u32 cp_mqd_base_addr_hi;
2806 u32 cp_hqd_active;
2807 u32 cp_hqd_vmid;
2808 u32 cp_hqd_persistent_state;
2809 u32 cp_hqd_pipe_priority;
2810 u32 cp_hqd_queue_priority;
2811 u32 cp_hqd_quantum;
2812 u32 cp_hqd_pq_base;
2813 u32 cp_hqd_pq_base_hi;
2814 u32 cp_hqd_pq_rptr;
2815 u32 cp_hqd_pq_rptr_report_addr;
2816 u32 cp_hqd_pq_rptr_report_addr_hi;
2817 u32 cp_hqd_pq_wptr_poll_addr;
2818 u32 cp_hqd_pq_wptr_poll_addr_hi;
2819 u32 cp_hqd_pq_doorbell_control;
2820 u32 cp_hqd_pq_wptr;
2821 u32 cp_hqd_pq_control;
2822 u32 cp_hqd_ib_base_addr;
2823 u32 cp_hqd_ib_base_addr_hi;
2824 u32 cp_hqd_ib_rptr;
2825 u32 cp_hqd_ib_control;
2826 u32 cp_hqd_iq_timer;
2827 u32 cp_hqd_iq_rptr;
2828 u32 cp_hqd_dequeue_request;
2829 u32 cp_hqd_dma_offload;
2830 u32 cp_hqd_sema_cmd;
2831 u32 cp_hqd_msg_type;
2832 u32 cp_hqd_atomic0_preop_lo;
2833 u32 cp_hqd_atomic0_preop_hi;
2834 u32 cp_hqd_atomic1_preop_lo;
2835 u32 cp_hqd_atomic1_preop_hi;
2836 u32 cp_hqd_hq_scheduler0;
2837 u32 cp_hqd_hq_scheduler1;
2838 u32 cp_mqd_control;
2839 };
2840
2841 struct bonaire_mqd
2842 {
2843 u32 header;
2844 u32 dispatch_initiator;
2845 u32 dimensions[3];
2846 u32 start_idx[3];
2847 u32 num_threads[3];
2848 u32 pipeline_stat_enable;
2849 u32 perf_counter_enable;
2850 u32 pgm[2];
2851 u32 tba[2];
2852 u32 tma[2];
2853 u32 pgm_rsrc[2];
2854 u32 vmid;
2855 u32 resource_limits;
2856 u32 static_thread_mgmt01[2];
2857 u32 tmp_ring_size;
2858 u32 static_thread_mgmt23[2];
2859 u32 restart[3];
2860 u32 thread_trace_enable;
2861 u32 reserved1;
2862 u32 user_data[16];
2863 u32 vgtcs_invoke_count[2];
2864 struct hqd_registers queue_state;
2865 u32 dequeue_cntr;
2866 u32 interrupt_queue[64];
2867 };
2868
2869 /**
2870 * cik_cp_compute_resume - setup the compute queue registers
2871 *
2872 * @rdev: radeon_device pointer
2873 *
2874 * Program the compute queues and test them to make sure they
2875 * are working.
2876 * Returns 0 for success, error for failure.
2877 */
2878 static int cik_cp_compute_resume(struct radeon_device *rdev)
2879 {
2880 int r, i, idx;
2881 u32 tmp;
2882 bool use_doorbell = true;
2883 u64 hqd_gpu_addr;
2884 u64 mqd_gpu_addr;
2885 u64 eop_gpu_addr;
2886 u64 wb_gpu_addr;
2887 u32 *buf;
2888 struct bonaire_mqd *mqd;
2889
2890 r = cik_cp_compute_start(rdev);
2891 if (r)
2892 return r;
2893
2894 /* fix up chicken bits */
2895 tmp = RREG32(CP_CPF_DEBUG);
2896 tmp |= (1 << 23);
2897 WREG32(CP_CPF_DEBUG, tmp);
2898
2899 /* init the pipes */
2900 for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2901 int me = (i < 4) ? 1 : 2;
2902 int pipe = (i < 4) ? i : (i - 4);
2903
2904 eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2905
2906 cik_srbm_select(rdev, me, pipe, 0, 0);
2907
2908 /* write the EOP addr */
2909 WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2910 WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2911
2912 /* set the VMID assigned */
2913 WREG32(CP_HPD_EOP_VMID, 0);
2914
2915 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2916 tmp = RREG32(CP_HPD_EOP_CONTROL);
2917 tmp &= ~EOP_SIZE_MASK;
2918 tmp |= drm_order(MEC_HPD_SIZE / 8);
2919 WREG32(CP_HPD_EOP_CONTROL, tmp);
2920 }
2921 cik_srbm_select(rdev, 0, 0, 0, 0);
2922
2923 /* init the queues. Just two for now. */
2924 for (i = 0; i < 2; i++) {
2925 if (i == 0)
2926 idx = CAYMAN_RING_TYPE_CP1_INDEX;
2927 else
2928 idx = CAYMAN_RING_TYPE_CP2_INDEX;
2929
2930 if (rdev->ring[idx].mqd_obj == NULL) {
2931 r = radeon_bo_create(rdev,
2932 sizeof(struct bonaire_mqd),
2933 PAGE_SIZE, true,
2934 RADEON_GEM_DOMAIN_GTT, NULL,
2935 &rdev->ring[idx].mqd_obj);
2936 if (r) {
2937 dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2938 return r;
2939 }
2940 }
2941
2942 r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2943 if (unlikely(r != 0)) {
2944 cik_cp_compute_fini(rdev);
2945 return r;
2946 }
2947 r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2948 &mqd_gpu_addr);
2949 if (r) {
2950 dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2951 cik_cp_compute_fini(rdev);
2952 return r;
2953 }
2954 r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2955 if (r) {
2956 dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2957 cik_cp_compute_fini(rdev);
2958 return r;
2959 }
2960
2961 /* doorbell offset */
2962 rdev->ring[idx].doorbell_offset =
2963 (rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2964
2965 /* init the mqd struct */
2966 memset(buf, 0, sizeof(struct bonaire_mqd));
2967
2968 mqd = (struct bonaire_mqd *)buf;
2969 mqd->header = 0xC0310800;
2970 mqd->static_thread_mgmt01[0] = 0xffffffff;
2971 mqd->static_thread_mgmt01[1] = 0xffffffff;
2972 mqd->static_thread_mgmt23[0] = 0xffffffff;
2973 mqd->static_thread_mgmt23[1] = 0xffffffff;
2974
2975 cik_srbm_select(rdev, rdev->ring[idx].me,
2976 rdev->ring[idx].pipe,
2977 rdev->ring[idx].queue, 0);
2978
2979 /* disable wptr polling */
2980 tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2981 tmp &= ~WPTR_POLL_EN;
2982 WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2983
2984 /* enable doorbell? */
2985 mqd->queue_state.cp_hqd_pq_doorbell_control =
2986 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2987 if (use_doorbell)
2988 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
2989 else
2990 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
2991 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
2992 mqd->queue_state.cp_hqd_pq_doorbell_control);
2993
2994 /* disable the queue if it's active */
2995 mqd->queue_state.cp_hqd_dequeue_request = 0;
2996 mqd->queue_state.cp_hqd_pq_rptr = 0;
2997 mqd->queue_state.cp_hqd_pq_wptr= 0;
2998 if (RREG32(CP_HQD_ACTIVE) & 1) {
2999 WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3000 for (i = 0; i < rdev->usec_timeout; i++) {
3001 if (!(RREG32(CP_HQD_ACTIVE) & 1))
3002 break;
3003 udelay(1);
3004 }
3005 WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3006 WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3007 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3008 }
3009
3010 /* set the pointer to the MQD */
3011 mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3012 mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3013 WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3014 WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3015 /* set MQD vmid to 0 */
3016 mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3017 mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3018 WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3019
3020 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3021 hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3022 mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3023 mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3024 WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3025 WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3026
3027 /* set up the HQD, this is similar to CP_RB0_CNTL */
3028 mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3029 mqd->queue_state.cp_hqd_pq_control &=
3030 ~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3031
3032 mqd->queue_state.cp_hqd_pq_control |=
3033 drm_order(rdev->ring[idx].ring_size / 8);
3034 mqd->queue_state.cp_hqd_pq_control |=
3035 (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3036 #ifdef __BIG_ENDIAN
3037 mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3038 #endif
3039 mqd->queue_state.cp_hqd_pq_control &=
3040 ~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3041 mqd->queue_state.cp_hqd_pq_control |=
3042 PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3043 WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3044
3045 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3046 if (i == 0)
3047 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3048 else
3049 wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3050 mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3051 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3052 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3053 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3054 mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3055
3056 /* set the wb address wether it's enabled or not */
3057 if (i == 0)
3058 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3059 else
3060 wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3061 mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3062 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3063 upper_32_bits(wb_gpu_addr) & 0xffff;
3064 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3065 mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3066 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3067 mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3068
3069 /* enable the doorbell if requested */
3070 if (use_doorbell) {
3071 mqd->queue_state.cp_hqd_pq_doorbell_control =
3072 RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3073 mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3074 mqd->queue_state.cp_hqd_pq_doorbell_control |=
3075 DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3076 mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3077 mqd->queue_state.cp_hqd_pq_doorbell_control &=
3078 ~(DOORBELL_SOURCE | DOORBELL_HIT);
3079
3080 } else {
3081 mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3082 }
3083 WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3084 mqd->queue_state.cp_hqd_pq_doorbell_control);
3085
3086 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3087 rdev->ring[idx].wptr = 0;
3088 mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3089 WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3090 rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3091 mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3092
3093 /* set the vmid for the queue */
3094 mqd->queue_state.cp_hqd_vmid = 0;
3095 WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3096
3097 /* activate the queue */
3098 mqd->queue_state.cp_hqd_active = 1;
3099 WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3100
3101 cik_srbm_select(rdev, 0, 0, 0, 0);
3102
3103 radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3104 radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3105
3106 rdev->ring[idx].ready = true;
3107 r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3108 if (r)
3109 rdev->ring[idx].ready = false;
3110 }
3111
3112 return 0;
3113 }
3114
3115 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3116 {
3117 cik_cp_gfx_enable(rdev, enable);
3118 cik_cp_compute_enable(rdev, enable);
3119 }
3120
3121 static int cik_cp_load_microcode(struct radeon_device *rdev)
3122 {
3123 int r;
3124
3125 r = cik_cp_gfx_load_microcode(rdev);
3126 if (r)
3127 return r;
3128 r = cik_cp_compute_load_microcode(rdev);
3129 if (r)
3130 return r;
3131
3132 return 0;
3133 }
3134
3135 static void cik_cp_fini(struct radeon_device *rdev)
3136 {
3137 cik_cp_gfx_fini(rdev);
3138 cik_cp_compute_fini(rdev);
3139 }
3140
3141 static int cik_cp_resume(struct radeon_device *rdev)
3142 {
3143 int r;
3144
3145 /* Reset all cp blocks */
3146 WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3147 RREG32(GRBM_SOFT_RESET);
3148 mdelay(15);
3149 WREG32(GRBM_SOFT_RESET, 0);
3150 RREG32(GRBM_SOFT_RESET);
3151
3152 r = cik_cp_load_microcode(rdev);
3153 if (r)
3154 return r;
3155
3156 r = cik_cp_gfx_resume(rdev);
3157 if (r)
3158 return r;
3159 r = cik_cp_compute_resume(rdev);
3160 if (r)
3161 return r;
3162
3163 return 0;
3164 }
3165
3166 /*
3167 * sDMA - System DMA
3168 * Starting with CIK, the GPU has new asynchronous
3169 * DMA engines. These engines are used for compute
3170 * and gfx. There are two DMA engines (SDMA0, SDMA1)
3171 * and each one supports 1 ring buffer used for gfx
3172 * and 2 queues used for compute.
3173 *
3174 * The programming model is very similar to the CP
3175 * (ring buffer, IBs, etc.), but sDMA has it's own
3176 * packet format that is different from the PM4 format
3177 * used by the CP. sDMA supports copying data, writing
3178 * embedded data, solid fills, and a number of other
3179 * things. It also has support for tiling/detiling of
3180 * buffers.
3181 */
3182 /**
3183 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3184 *
3185 * @rdev: radeon_device pointer
3186 * @ib: IB object to schedule
3187 *
3188 * Schedule an IB in the DMA ring (CIK).
3189 */
3190 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3191 struct radeon_ib *ib)
3192 {
3193 struct radeon_ring *ring = &rdev->ring[ib->ring];
3194 u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3195
3196 if (rdev->wb.enabled) {
3197 u32 next_rptr = ring->wptr + 5;
3198 while ((next_rptr & 7) != 4)
3199 next_rptr++;
3200 next_rptr += 4;
3201 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3202 radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3203 radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3204 radeon_ring_write(ring, 1); /* number of DWs to follow */
3205 radeon_ring_write(ring, next_rptr);
3206 }
3207
3208 /* IB packet must end on a 8 DW boundary */
3209 while ((ring->wptr & 7) != 4)
3210 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3211 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3212 radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3213 radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3214 radeon_ring_write(ring, ib->length_dw);
3215
3216 }
3217
3218 /**
3219 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3220 *
3221 * @rdev: radeon_device pointer
3222 * @fence: radeon fence object
3223 *
3224 * Add a DMA fence packet to the ring to write
3225 * the fence seq number and DMA trap packet to generate
3226 * an interrupt if needed (CIK).
3227 */
3228 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3229 struct radeon_fence *fence)
3230 {
3231 struct radeon_ring *ring = &rdev->ring[fence->ring];
3232 u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3233 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3234 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3235 u32 ref_and_mask;
3236
3237 if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3238 ref_and_mask = SDMA0;
3239 else
3240 ref_and_mask = SDMA1;
3241
3242 /* write the fence */
3243 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3244 radeon_ring_write(ring, addr & 0xffffffff);
3245 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3246 radeon_ring_write(ring, fence->seq);
3247 /* generate an interrupt */
3248 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3249 /* flush HDP */
3250 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3251 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3252 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3253 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3254 radeon_ring_write(ring, ref_and_mask); /* MASK */
3255 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3256 }
3257
3258 /**
3259 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3260 *
3261 * @rdev: radeon_device pointer
3262 * @ring: radeon_ring structure holding ring information
3263 * @semaphore: radeon semaphore object
3264 * @emit_wait: wait or signal semaphore
3265 *
3266 * Add a DMA semaphore packet to the ring wait on or signal
3267 * other rings (CIK).
3268 */
3269 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3270 struct radeon_ring *ring,
3271 struct radeon_semaphore *semaphore,
3272 bool emit_wait)
3273 {
3274 u64 addr = semaphore->gpu_addr;
3275 u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3276
3277 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3278 radeon_ring_write(ring, addr & 0xfffffff8);
3279 radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3280 }
3281
3282 /**
3283 * cik_sdma_gfx_stop - stop the gfx async dma engines
3284 *
3285 * @rdev: radeon_device pointer
3286 *
3287 * Stop the gfx async dma ring buffers (CIK).
3288 */
3289 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3290 {
3291 u32 rb_cntl, reg_offset;
3292 int i;
3293
3294 radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3295
3296 for (i = 0; i < 2; i++) {
3297 if (i == 0)
3298 reg_offset = SDMA0_REGISTER_OFFSET;
3299 else
3300 reg_offset = SDMA1_REGISTER_OFFSET;
3301 rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3302 rb_cntl &= ~SDMA_RB_ENABLE;
3303 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3304 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3305 }
3306 }
3307
3308 /**
3309 * cik_sdma_rlc_stop - stop the compute async dma engines
3310 *
3311 * @rdev: radeon_device pointer
3312 *
3313 * Stop the compute async dma queues (CIK).
3314 */
3315 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3316 {
3317 /* XXX todo */
3318 }
3319
3320 /**
3321 * cik_sdma_enable - stop the async dma engines
3322 *
3323 * @rdev: radeon_device pointer
3324 * @enable: enable/disable the DMA MEs.
3325 *
3326 * Halt or unhalt the async dma engines (CIK).
3327 */
3328 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3329 {
3330 u32 me_cntl, reg_offset;
3331 int i;
3332
3333 for (i = 0; i < 2; i++) {
3334 if (i == 0)
3335 reg_offset = SDMA0_REGISTER_OFFSET;
3336 else
3337 reg_offset = SDMA1_REGISTER_OFFSET;
3338 me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3339 if (enable)
3340 me_cntl &= ~SDMA_HALT;
3341 else
3342 me_cntl |= SDMA_HALT;
3343 WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3344 }
3345 }
3346
3347 /**
3348 * cik_sdma_gfx_resume - setup and start the async dma engines
3349 *
3350 * @rdev: radeon_device pointer
3351 *
3352 * Set up the gfx DMA ring buffers and enable them (CIK).
3353 * Returns 0 for success, error for failure.
3354 */
3355 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3356 {
3357 struct radeon_ring *ring;
3358 u32 rb_cntl, ib_cntl;
3359 u32 rb_bufsz;
3360 u32 reg_offset, wb_offset;
3361 int i, r;
3362
3363 for (i = 0; i < 2; i++) {
3364 if (i == 0) {
3365 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3366 reg_offset = SDMA0_REGISTER_OFFSET;
3367 wb_offset = R600_WB_DMA_RPTR_OFFSET;
3368 } else {
3369 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3370 reg_offset = SDMA1_REGISTER_OFFSET;
3371 wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3372 }
3373
3374 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3375 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3376
3377 /* Set ring buffer size in dwords */
3378 rb_bufsz = drm_order(ring->ring_size / 4);
3379 rb_cntl = rb_bufsz << 1;
3380 #ifdef __BIG_ENDIAN
3381 rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3382 #endif
3383 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3384
3385 /* Initialize the ring buffer's read and write pointers */
3386 WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3387 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3388
3389 /* set the wb address whether it's enabled or not */
3390 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3391 upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3392 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3393 ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3394
3395 if (rdev->wb.enabled)
3396 rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3397
3398 WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3399 WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3400
3401 ring->wptr = 0;
3402 WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3403
3404 ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3405
3406 /* enable DMA RB */
3407 WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3408
3409 ib_cntl = SDMA_IB_ENABLE;
3410 #ifdef __BIG_ENDIAN
3411 ib_cntl |= SDMA_IB_SWAP_ENABLE;
3412 #endif
3413 /* enable DMA IBs */
3414 WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3415
3416 ring->ready = true;
3417
3418 r = radeon_ring_test(rdev, ring->idx, ring);
3419 if (r) {
3420 ring->ready = false;
3421 return r;
3422 }
3423 }
3424
3425 radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3426
3427 return 0;
3428 }
3429
3430 /**
3431 * cik_sdma_rlc_resume - setup and start the async dma engines
3432 *
3433 * @rdev: radeon_device pointer
3434 *
3435 * Set up the compute DMA queues and enable them (CIK).
3436 * Returns 0 for success, error for failure.
3437 */
3438 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3439 {
3440 /* XXX todo */
3441 return 0;
3442 }
3443
3444 /**
3445 * cik_sdma_load_microcode - load the sDMA ME ucode
3446 *
3447 * @rdev: radeon_device pointer
3448 *
3449 * Loads the sDMA0/1 ucode.
3450 * Returns 0 for success, -EINVAL if the ucode is not available.
3451 */
3452 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3453 {
3454 const __be32 *fw_data;
3455 int i;
3456
3457 if (!rdev->sdma_fw)
3458 return -EINVAL;
3459
3460 /* stop the gfx rings and rlc compute queues */
3461 cik_sdma_gfx_stop(rdev);
3462 cik_sdma_rlc_stop(rdev);
3463
3464 /* halt the MEs */
3465 cik_sdma_enable(rdev, false);
3466
3467 /* sdma0 */
3468 fw_data = (const __be32 *)rdev->sdma_fw->data;
3469 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3470 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3471 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3472 WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3473
3474 /* sdma1 */
3475 fw_data = (const __be32 *)rdev->sdma_fw->data;
3476 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3477 for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3478 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3479 WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3480
3481 WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3482 WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3483 return 0;
3484 }
3485
3486 /**
3487 * cik_sdma_resume - setup and start the async dma engines
3488 *
3489 * @rdev: radeon_device pointer
3490 *
3491 * Set up the DMA engines and enable them (CIK).
3492 * Returns 0 for success, error for failure.
3493 */
3494 static int cik_sdma_resume(struct radeon_device *rdev)
3495 {
3496 int r;
3497
3498 /* Reset dma */
3499 WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3500 RREG32(SRBM_SOFT_RESET);
3501 udelay(50);
3502 WREG32(SRBM_SOFT_RESET, 0);
3503 RREG32(SRBM_SOFT_RESET);
3504
3505 r = cik_sdma_load_microcode(rdev);
3506 if (r)
3507 return r;
3508
3509 /* unhalt the MEs */
3510 cik_sdma_enable(rdev, true);
3511
3512 /* start the gfx rings and rlc compute queues */
3513 r = cik_sdma_gfx_resume(rdev);
3514 if (r)
3515 return r;
3516 r = cik_sdma_rlc_resume(rdev);
3517 if (r)
3518 return r;
3519
3520 return 0;
3521 }
3522
3523 /**
3524 * cik_sdma_fini - tear down the async dma engines
3525 *
3526 * @rdev: radeon_device pointer
3527 *
3528 * Stop the async dma engines and free the rings (CIK).
3529 */
3530 static void cik_sdma_fini(struct radeon_device *rdev)
3531 {
3532 /* stop the gfx rings and rlc compute queues */
3533 cik_sdma_gfx_stop(rdev);
3534 cik_sdma_rlc_stop(rdev);
3535 /* halt the MEs */
3536 cik_sdma_enable(rdev, false);
3537 radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3538 radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3539 /* XXX - compute dma queue tear down */
3540 }
3541
3542 /**
3543 * cik_copy_dma - copy pages using the DMA engine
3544 *
3545 * @rdev: radeon_device pointer
3546 * @src_offset: src GPU address
3547 * @dst_offset: dst GPU address
3548 * @num_gpu_pages: number of GPU pages to xfer
3549 * @fence: radeon fence object
3550 *
3551 * Copy GPU paging using the DMA engine (CIK).
3552 * Used by the radeon ttm implementation to move pages if
3553 * registered as the asic copy callback.
3554 */
3555 int cik_copy_dma(struct radeon_device *rdev,
3556 uint64_t src_offset, uint64_t dst_offset,
3557 unsigned num_gpu_pages,
3558 struct radeon_fence **fence)
3559 {
3560 struct radeon_semaphore *sem = NULL;
3561 int ring_index = rdev->asic->copy.dma_ring_index;
3562 struct radeon_ring *ring = &rdev->ring[ring_index];
3563 u32 size_in_bytes, cur_size_in_bytes;
3564 int i, num_loops;
3565 int r = 0;
3566
3567 r = radeon_semaphore_create(rdev, &sem);
3568 if (r) {
3569 DRM_ERROR("radeon: moving bo (%d).\n", r);
3570 return r;
3571 }
3572
3573 size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3574 num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3575 r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3576 if (r) {
3577 DRM_ERROR("radeon: moving bo (%d).\n", r);
3578 radeon_semaphore_free(rdev, &sem, NULL);
3579 return r;
3580 }
3581
3582 if (radeon_fence_need_sync(*fence, ring->idx)) {
3583 radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3584 ring->idx);
3585 radeon_fence_note_sync(*fence, ring->idx);
3586 } else {
3587 radeon_semaphore_free(rdev, &sem, NULL);
3588 }
3589
3590 for (i = 0; i < num_loops; i++) {
3591 cur_size_in_bytes = size_in_bytes;
3592 if (cur_size_in_bytes > 0x1fffff)
3593 cur_size_in_bytes = 0x1fffff;
3594 size_in_bytes -= cur_size_in_bytes;
3595 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3596 radeon_ring_write(ring, cur_size_in_bytes);
3597 radeon_ring_write(ring, 0); /* src/dst endian swap */
3598 radeon_ring_write(ring, src_offset & 0xffffffff);
3599 radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3600 radeon_ring_write(ring, dst_offset & 0xfffffffc);
3601 radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3602 src_offset += cur_size_in_bytes;
3603 dst_offset += cur_size_in_bytes;
3604 }
3605
3606 r = radeon_fence_emit(rdev, fence, ring->idx);
3607 if (r) {
3608 radeon_ring_unlock_undo(rdev, ring);
3609 return r;
3610 }
3611
3612 radeon_ring_unlock_commit(rdev, ring);
3613 radeon_semaphore_free(rdev, &sem, *fence);
3614
3615 return r;
3616 }
3617
3618 /**
3619 * cik_sdma_ring_test - simple async dma engine test
3620 *
3621 * @rdev: radeon_device pointer
3622 * @ring: radeon_ring structure holding ring information
3623 *
3624 * Test the DMA engine by writing using it to write an
3625 * value to memory. (CIK).
3626 * Returns 0 for success, error for failure.
3627 */
3628 int cik_sdma_ring_test(struct radeon_device *rdev,
3629 struct radeon_ring *ring)
3630 {
3631 unsigned i;
3632 int r;
3633 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3634 u32 tmp;
3635
3636 if (!ptr) {
3637 DRM_ERROR("invalid vram scratch pointer\n");
3638 return -EINVAL;
3639 }
3640
3641 tmp = 0xCAFEDEAD;
3642 writel(tmp, ptr);
3643
3644 r = radeon_ring_lock(rdev, ring, 4);
3645 if (r) {
3646 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3647 return r;
3648 }
3649 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3650 radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3651 radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3652 radeon_ring_write(ring, 1); /* number of DWs to follow */
3653 radeon_ring_write(ring, 0xDEADBEEF);
3654 radeon_ring_unlock_commit(rdev, ring);
3655
3656 for (i = 0; i < rdev->usec_timeout; i++) {
3657 tmp = readl(ptr);
3658 if (tmp == 0xDEADBEEF)
3659 break;
3660 DRM_UDELAY(1);
3661 }
3662
3663 if (i < rdev->usec_timeout) {
3664 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3665 } else {
3666 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3667 ring->idx, tmp);
3668 r = -EINVAL;
3669 }
3670 return r;
3671 }
3672
3673 /**
3674 * cik_sdma_ib_test - test an IB on the DMA engine
3675 *
3676 * @rdev: radeon_device pointer
3677 * @ring: radeon_ring structure holding ring information
3678 *
3679 * Test a simple IB in the DMA ring (CIK).
3680 * Returns 0 on success, error on failure.
3681 */
3682 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3683 {
3684 struct radeon_ib ib;
3685 unsigned i;
3686 int r;
3687 void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3688 u32 tmp = 0;
3689
3690 if (!ptr) {
3691 DRM_ERROR("invalid vram scratch pointer\n");
3692 return -EINVAL;
3693 }
3694
3695 tmp = 0xCAFEDEAD;
3696 writel(tmp, ptr);
3697
3698 r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3699 if (r) {
3700 DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3701 return r;
3702 }
3703
3704 ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3705 ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3706 ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3707 ib.ptr[3] = 1;
3708 ib.ptr[4] = 0xDEADBEEF;
3709 ib.length_dw = 5;
3710
3711 r = radeon_ib_schedule(rdev, &ib, NULL);
3712 if (r) {
3713 radeon_ib_free(rdev, &ib);
3714 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3715 return r;
3716 }
3717 r = radeon_fence_wait(ib.fence, false);
3718 if (r) {
3719 DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3720 return r;
3721 }
3722 for (i = 0; i < rdev->usec_timeout; i++) {
3723 tmp = readl(ptr);
3724 if (tmp == 0xDEADBEEF)
3725 break;
3726 DRM_UDELAY(1);
3727 }
3728 if (i < rdev->usec_timeout) {
3729 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3730 } else {
3731 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3732 r = -EINVAL;
3733 }
3734 radeon_ib_free(rdev, &ib);
3735 return r;
3736 }
3737
3738
3739 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3740 {
3741 dev_info(rdev->dev, " GRBM_STATUS=0x%08X\n",
3742 RREG32(GRBM_STATUS));
3743 dev_info(rdev->dev, " GRBM_STATUS2=0x%08X\n",
3744 RREG32(GRBM_STATUS2));
3745 dev_info(rdev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3746 RREG32(GRBM_STATUS_SE0));
3747 dev_info(rdev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3748 RREG32(GRBM_STATUS_SE1));
3749 dev_info(rdev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3750 RREG32(GRBM_STATUS_SE2));
3751 dev_info(rdev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3752 RREG32(GRBM_STATUS_SE3));
3753 dev_info(rdev->dev, " SRBM_STATUS=0x%08X\n",
3754 RREG32(SRBM_STATUS));
3755 dev_info(rdev->dev, " SRBM_STATUS2=0x%08X\n",
3756 RREG32(SRBM_STATUS2));
3757 dev_info(rdev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
3758 RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3759 dev_info(rdev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
3760 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3761 dev_info(rdev->dev, " CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3762 dev_info(rdev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3763 RREG32(CP_STALLED_STAT1));
3764 dev_info(rdev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3765 RREG32(CP_STALLED_STAT2));
3766 dev_info(rdev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3767 RREG32(CP_STALLED_STAT3));
3768 dev_info(rdev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3769 RREG32(CP_CPF_BUSY_STAT));
3770 dev_info(rdev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3771 RREG32(CP_CPF_STALLED_STAT1));
3772 dev_info(rdev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3773 dev_info(rdev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3774 dev_info(rdev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3775 RREG32(CP_CPC_STALLED_STAT1));
3776 dev_info(rdev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3777 }
3778
3779 /**
3780 * cik_gpu_check_soft_reset - check which blocks are busy
3781 *
3782 * @rdev: radeon_device pointer
3783 *
3784 * Check which blocks are busy and return the relevant reset
3785 * mask to be used by cik_gpu_soft_reset().
3786 * Returns a mask of the blocks to be reset.
3787 */
3788 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3789 {
3790 u32 reset_mask = 0;
3791 u32 tmp;
3792
3793 /* GRBM_STATUS */
3794 tmp = RREG32(GRBM_STATUS);
3795 if (tmp & (PA_BUSY | SC_BUSY |
3796 BCI_BUSY | SX_BUSY |
3797 TA_BUSY | VGT_BUSY |
3798 DB_BUSY | CB_BUSY |
3799 GDS_BUSY | SPI_BUSY |
3800 IA_BUSY | IA_BUSY_NO_DMA))
3801 reset_mask |= RADEON_RESET_GFX;
3802
3803 if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3804 reset_mask |= RADEON_RESET_CP;
3805
3806 /* GRBM_STATUS2 */
3807 tmp = RREG32(GRBM_STATUS2);
3808 if (tmp & RLC_BUSY)
3809 reset_mask |= RADEON_RESET_RLC;
3810
3811 /* SDMA0_STATUS_REG */
3812 tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3813 if (!(tmp & SDMA_IDLE))
3814 reset_mask |= RADEON_RESET_DMA;
3815
3816 /* SDMA1_STATUS_REG */
3817 tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3818 if (!(tmp & SDMA_IDLE))
3819 reset_mask |= RADEON_RESET_DMA1;
3820
3821 /* SRBM_STATUS2 */
3822 tmp = RREG32(SRBM_STATUS2);
3823 if (tmp & SDMA_BUSY)
3824 reset_mask |= RADEON_RESET_DMA;
3825
3826 if (tmp & SDMA1_BUSY)
3827 reset_mask |= RADEON_RESET_DMA1;
3828
3829 /* SRBM_STATUS */
3830 tmp = RREG32(SRBM_STATUS);
3831
3832 if (tmp & IH_BUSY)
3833 reset_mask |= RADEON_RESET_IH;
3834
3835 if (tmp & SEM_BUSY)
3836 reset_mask |= RADEON_RESET_SEM;
3837
3838 if (tmp & GRBM_RQ_PENDING)
3839 reset_mask |= RADEON_RESET_GRBM;
3840
3841 if (tmp & VMC_BUSY)
3842 reset_mask |= RADEON_RESET_VMC;
3843
3844 if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3845 MCC_BUSY | MCD_BUSY))
3846 reset_mask |= RADEON_RESET_MC;
3847
3848 if (evergreen_is_display_hung(rdev))
3849 reset_mask |= RADEON_RESET_DISPLAY;
3850
3851 /* Skip MC reset as it's mostly likely not hung, just busy */
3852 if (reset_mask & RADEON_RESET_MC) {
3853 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3854 reset_mask &= ~RADEON_RESET_MC;
3855 }
3856
3857 return reset_mask;
3858 }
3859
3860 /**
3861 * cik_gpu_soft_reset - soft reset GPU
3862 *
3863 * @rdev: radeon_device pointer
3864 * @reset_mask: mask of which blocks to reset
3865 *
3866 * Soft reset the blocks specified in @reset_mask.
3867 */
3868 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3869 {
3870 struct evergreen_mc_save save;
3871 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3872 u32 tmp;
3873
3874 if (reset_mask == 0)
3875 return;
3876
3877 dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3878
3879 cik_print_gpu_status_regs(rdev);
3880 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3881 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3882 dev_info(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3883 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3884
3885 /* stop the rlc */
3886 cik_rlc_stop(rdev);
3887
3888 /* Disable GFX parsing/prefetching */
3889 WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3890
3891 /* Disable MEC parsing/prefetching */
3892 WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3893
3894 if (reset_mask & RADEON_RESET_DMA) {
3895 /* sdma0 */
3896 tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3897 tmp |= SDMA_HALT;
3898 WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3899 }
3900 if (reset_mask & RADEON_RESET_DMA1) {
3901 /* sdma1 */
3902 tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3903 tmp |= SDMA_HALT;
3904 WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3905 }
3906
3907 evergreen_mc_stop(rdev, &save);
3908 if (evergreen_mc_wait_for_idle(rdev)) {
3909 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3910 }
3911
3912 if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3913 grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3914
3915 if (reset_mask & RADEON_RESET_CP) {
3916 grbm_soft_reset |= SOFT_RESET_CP;
3917
3918 srbm_soft_reset |= SOFT_RESET_GRBM;
3919 }
3920
3921 if (reset_mask & RADEON_RESET_DMA)
3922 srbm_soft_reset |= SOFT_RESET_SDMA;
3923
3924 if (reset_mask & RADEON_RESET_DMA1)
3925 srbm_soft_reset |= SOFT_RESET_SDMA1;
3926
3927 if (reset_mask & RADEON_RESET_DISPLAY)
3928 srbm_soft_reset |= SOFT_RESET_DC;
3929
3930 if (reset_mask & RADEON_RESET_RLC)
3931 grbm_soft_reset |= SOFT_RESET_RLC;
3932
3933 if (reset_mask & RADEON_RESET_SEM)
3934 srbm_soft_reset |= SOFT_RESET_SEM;
3935
3936 if (reset_mask & RADEON_RESET_IH)
3937 srbm_soft_reset |= SOFT_RESET_IH;
3938
3939 if (reset_mask & RADEON_RESET_GRBM)
3940 srbm_soft_reset |= SOFT_RESET_GRBM;
3941
3942 if (reset_mask & RADEON_RESET_VMC)
3943 srbm_soft_reset |= SOFT_RESET_VMC;
3944
3945 if (!(rdev->flags & RADEON_IS_IGP)) {
3946 if (reset_mask & RADEON_RESET_MC)
3947 srbm_soft_reset |= SOFT_RESET_MC;
3948 }
3949
3950 if (grbm_soft_reset) {
3951 tmp = RREG32(GRBM_SOFT_RESET);
3952 tmp |= grbm_soft_reset;
3953 dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3954 WREG32(GRBM_SOFT_RESET, tmp);
3955 tmp = RREG32(GRBM_SOFT_RESET);
3956
3957 udelay(50);
3958
3959 tmp &= ~grbm_soft_reset;
3960 WREG32(GRBM_SOFT_RESET, tmp);
3961 tmp = RREG32(GRBM_SOFT_RESET);
3962 }
3963
3964 if (srbm_soft_reset) {
3965 tmp = RREG32(SRBM_SOFT_RESET);
3966 tmp |= srbm_soft_reset;
3967 dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3968 WREG32(SRBM_SOFT_RESET, tmp);
3969 tmp = RREG32(SRBM_SOFT_RESET);
3970
3971 udelay(50);
3972
3973 tmp &= ~srbm_soft_reset;
3974 WREG32(SRBM_SOFT_RESET, tmp);
3975 tmp = RREG32(SRBM_SOFT_RESET);
3976 }
3977
3978 /* Wait a little for things to settle down */
3979 udelay(50);
3980
3981 evergreen_mc_resume(rdev, &save);
3982 udelay(50);
3983
3984 cik_print_gpu_status_regs(rdev);
3985 }
3986
3987 /**
3988 * cik_asic_reset - soft reset GPU
3989 *
3990 * @rdev: radeon_device pointer
3991 *
3992 * Look up which blocks are hung and attempt
3993 * to reset them.
3994 * Returns 0 for success.
3995 */
3996 int cik_asic_reset(struct radeon_device *rdev)
3997 {
3998 u32 reset_mask;
3999
4000 reset_mask = cik_gpu_check_soft_reset(rdev);
4001
4002 if (reset_mask)
4003 r600_set_bios_scratch_engine_hung(rdev, true);
4004
4005 cik_gpu_soft_reset(rdev, reset_mask);
4006
4007 reset_mask = cik_gpu_check_soft_reset(rdev);
4008
4009 if (!reset_mask)
4010 r600_set_bios_scratch_engine_hung(rdev, false);
4011
4012 return 0;
4013 }
4014
4015 /**
4016 * cik_gfx_is_lockup - check if the 3D engine is locked up
4017 *
4018 * @rdev: radeon_device pointer
4019 * @ring: radeon_ring structure holding ring information
4020 *
4021 * Check if the 3D engine is locked up (CIK).
4022 * Returns true if the engine is locked, false if not.
4023 */
4024 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4025 {
4026 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4027
4028 if (!(reset_mask & (RADEON_RESET_GFX |
4029 RADEON_RESET_COMPUTE |
4030 RADEON_RESET_CP))) {
4031 radeon_ring_lockup_update(ring);
4032 return false;
4033 }
4034 /* force CP activities */
4035 radeon_ring_force_activity(rdev, ring);
4036 return radeon_ring_test_lockup(rdev, ring);
4037 }
4038
4039 /**
4040 * cik_sdma_is_lockup - Check if the DMA engine is locked up
4041 *
4042 * @rdev: radeon_device pointer
4043 * @ring: radeon_ring structure holding ring information
4044 *
4045 * Check if the async DMA engine is locked up (CIK).
4046 * Returns true if the engine appears to be locked up, false if not.
4047 */
4048 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4049 {
4050 u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4051 u32 mask;
4052
4053 if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4054 mask = RADEON_RESET_DMA;
4055 else
4056 mask = RADEON_RESET_DMA1;
4057
4058 if (!(reset_mask & mask)) {
4059 radeon_ring_lockup_update(ring);
4060 return false;
4061 }
4062 /* force ring activities */
4063 radeon_ring_force_activity(rdev, ring);
4064 return radeon_ring_test_lockup(rdev, ring);
4065 }
4066
4067 /* MC */
4068 /**
4069 * cik_mc_program - program the GPU memory controller
4070 *
4071 * @rdev: radeon_device pointer
4072 *
4073 * Set the location of vram, gart, and AGP in the GPU's
4074 * physical address space (CIK).
4075 */
4076 static void cik_mc_program(struct radeon_device *rdev)
4077 {
4078 struct evergreen_mc_save save;
4079 u32 tmp;
4080 int i, j;
4081
4082 /* Initialize HDP */
4083 for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4084 WREG32((0x2c14 + j), 0x00000000);
4085 WREG32((0x2c18 + j), 0x00000000);
4086 WREG32((0x2c1c + j), 0x00000000);
4087 WREG32((0x2c20 + j), 0x00000000);
4088 WREG32((0x2c24 + j), 0x00000000);
4089 }
4090 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4091
4092 evergreen_mc_stop(rdev, &save);
4093 if (radeon_mc_wait_for_idle(rdev)) {
4094 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4095 }
4096 /* Lockout access through VGA aperture*/
4097 WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4098 /* Update configuration */
4099 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4100 rdev->mc.vram_start >> 12);
4101 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4102 rdev->mc.vram_end >> 12);
4103 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4104 rdev->vram_scratch.gpu_addr >> 12);
4105 tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4106 tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4107 WREG32(MC_VM_FB_LOCATION, tmp);
4108 /* XXX double check these! */
4109 WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4110 WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4111 WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4112 WREG32(MC_VM_AGP_BASE, 0);
4113 WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4114 WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4115 if (radeon_mc_wait_for_idle(rdev)) {
4116 dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4117 }
4118 evergreen_mc_resume(rdev, &save);
4119 /* we need to own VRAM, so turn off the VGA renderer here
4120 * to stop it overwriting our objects */
4121 rv515_vga_render_disable(rdev);
4122 }
4123
4124 /**
4125 * cik_mc_init - initialize the memory controller driver params
4126 *
4127 * @rdev: radeon_device pointer
4128 *
4129 * Look up the amount of vram, vram width, and decide how to place
4130 * vram and gart within the GPU's physical address space (CIK).
4131 * Returns 0 for success.
4132 */
4133 static int cik_mc_init(struct radeon_device *rdev)
4134 {
4135 u32 tmp;
4136 int chansize, numchan;
4137
4138 /* Get VRAM informations */
4139 rdev->mc.vram_is_ddr = true;
4140 tmp = RREG32(MC_ARB_RAMCFG);
4141 if (tmp & CHANSIZE_MASK) {
4142 chansize = 64;
4143 } else {
4144 chansize = 32;
4145 }
4146 tmp = RREG32(MC_SHARED_CHMAP);
4147 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4148 case 0:
4149 default:
4150 numchan = 1;
4151 break;
4152 case 1:
4153 numchan = 2;
4154 break;
4155 case 2:
4156 numchan = 4;
4157 break;
4158 case 3:
4159 numchan = 8;
4160 break;
4161 case 4:
4162 numchan = 3;
4163 break;
4164 case 5:
4165 numchan = 6;
4166 break;
4167 case 6:
4168 numchan = 10;
4169 break;
4170 case 7:
4171 numchan = 12;
4172 break;
4173 case 8:
4174 numchan = 16;
4175 break;
4176 }
4177 rdev->mc.vram_width = numchan * chansize;
4178 /* Could aper size report 0 ? */
4179 rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4180 rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4181 /* size in MB on si */
4182 rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4183 rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4184 rdev->mc.visible_vram_size = rdev->mc.aper_size;
4185 si_vram_gtt_location(rdev, &rdev->mc);
4186 radeon_update_bandwidth_info(rdev);
4187
4188 return 0;
4189 }
4190
4191 /*
4192 * GART
4193 * VMID 0 is the physical GPU addresses as used by the kernel.
4194 * VMIDs 1-15 are used for userspace clients and are handled
4195 * by the radeon vm/hsa code.
4196 */
4197 /**
4198 * cik_pcie_gart_tlb_flush - gart tlb flush callback
4199 *
4200 * @rdev: radeon_device pointer
4201 *
4202 * Flush the TLB for the VMID 0 page table (CIK).
4203 */
4204 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4205 {
4206 /* flush hdp cache */
4207 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4208
4209 /* bits 0-15 are the VM contexts0-15 */
4210 WREG32(VM_INVALIDATE_REQUEST, 0x1);
4211 }
4212
4213 /**
4214 * cik_pcie_gart_enable - gart enable
4215 *
4216 * @rdev: radeon_device pointer
4217 *
4218 * This sets up the TLBs, programs the page tables for VMID0,
4219 * sets up the hw for VMIDs 1-15 which are allocated on
4220 * demand, and sets up the global locations for the LDS, GDS,
4221 * and GPUVM for FSA64 clients (CIK).
4222 * Returns 0 for success, errors for failure.
4223 */
4224 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4225 {
4226 int r, i;
4227
4228 if (rdev->gart.robj == NULL) {
4229 dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4230 return -EINVAL;
4231 }
4232 r = radeon_gart_table_vram_pin(rdev);
4233 if (r)
4234 return r;
4235 radeon_gart_restore(rdev);
4236 /* Setup TLB control */
4237 WREG32(MC_VM_MX_L1_TLB_CNTL,
4238 (0xA << 7) |
4239 ENABLE_L1_TLB |
4240 SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4241 ENABLE_ADVANCED_DRIVER_MODEL |
4242 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4243 /* Setup L2 cache */
4244 WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4245 ENABLE_L2_FRAGMENT_PROCESSING |
4246 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4247 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4248 EFFECTIVE_L2_QUEUE_SIZE(7) |
4249 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4250 WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4251 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4252 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4253 /* setup context0 */
4254 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4255 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4256 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4257 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4258 (u32)(rdev->dummy_page.addr >> 12));
4259 WREG32(VM_CONTEXT0_CNTL2, 0);
4260 WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4261 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4262
4263 WREG32(0x15D4, 0);
4264 WREG32(0x15D8, 0);
4265 WREG32(0x15DC, 0);
4266
4267 /* empty context1-15 */
4268 /* FIXME start with 4G, once using 2 level pt switch to full
4269 * vm size space
4270 */
4271 /* set vm size, must be a multiple of 4 */
4272 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4273 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4274 for (i = 1; i < 16; i++) {
4275 if (i < 8)
4276 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4277 rdev->gart.table_addr >> 12);
4278 else
4279 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4280 rdev->gart.table_addr >> 12);
4281 }
4282
4283 /* enable context1-15 */
4284 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4285 (u32)(rdev->dummy_page.addr >> 12));
4286 WREG32(VM_CONTEXT1_CNTL2, 4);
4287 WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4288 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4289 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4290 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4291 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4292 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4293 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4294 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4295 VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4296 READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4297 READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4298 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4299 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4300
4301 /* TC cache setup ??? */
4302 WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4303 WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4304 WREG32(TC_CFG_L1_STORE_POLICY, 0);
4305
4306 WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4307 WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4308 WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4309 WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4310 WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4311
4312 WREG32(TC_CFG_L1_VOLATILE, 0);
4313 WREG32(TC_CFG_L2_VOLATILE, 0);
4314
4315 if (rdev->family == CHIP_KAVERI) {
4316 u32 tmp = RREG32(CHUB_CONTROL);
4317 tmp &= ~BYPASS_VM;
4318 WREG32(CHUB_CONTROL, tmp);
4319 }
4320
4321 /* XXX SH_MEM regs */
4322 /* where to put LDS, scratch, GPUVM in FSA64 space */
4323 for (i = 0; i < 16; i++) {
4324 cik_srbm_select(rdev, 0, 0, 0, i);
4325 /* CP and shaders */
4326 WREG32(SH_MEM_CONFIG, 0);
4327 WREG32(SH_MEM_APE1_BASE, 1);
4328 WREG32(SH_MEM_APE1_LIMIT, 0);
4329 WREG32(SH_MEM_BASES, 0);
4330 /* SDMA GFX */
4331 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4332 WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4333 WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4334 WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4335 /* XXX SDMA RLC - todo */
4336 }
4337 cik_srbm_select(rdev, 0, 0, 0, 0);
4338
4339 cik_pcie_gart_tlb_flush(rdev);
4340 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4341 (unsigned)(rdev->mc.gtt_size >> 20),
4342 (unsigned long long)rdev->gart.table_addr);
4343 rdev->gart.ready = true;
4344 return 0;
4345 }
4346
4347 /**
4348 * cik_pcie_gart_disable - gart disable
4349 *
4350 * @rdev: radeon_device pointer
4351 *
4352 * This disables all VM page table (CIK).
4353 */
4354 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4355 {
4356 /* Disable all tables */
4357 WREG32(VM_CONTEXT0_CNTL, 0);
4358 WREG32(VM_CONTEXT1_CNTL, 0);
4359 /* Setup TLB control */
4360 WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4361 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4362 /* Setup L2 cache */
4363 WREG32(VM_L2_CNTL,
4364 ENABLE_L2_FRAGMENT_PROCESSING |
4365 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4366 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4367 EFFECTIVE_L2_QUEUE_SIZE(7) |
4368 CONTEXT1_IDENTITY_ACCESS_MODE(1));
4369 WREG32(VM_L2_CNTL2, 0);
4370 WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4371 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4372 radeon_gart_table_vram_unpin(rdev);
4373 }
4374
4375 /**
4376 * cik_pcie_gart_fini - vm fini callback
4377 *
4378 * @rdev: radeon_device pointer
4379 *
4380 * Tears down the driver GART/VM setup (CIK).
4381 */
4382 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4383 {
4384 cik_pcie_gart_disable(rdev);
4385 radeon_gart_table_vram_free(rdev);
4386 radeon_gart_fini(rdev);
4387 }
4388
4389 /* vm parser */
4390 /**
4391 * cik_ib_parse - vm ib_parse callback
4392 *
4393 * @rdev: radeon_device pointer
4394 * @ib: indirect buffer pointer
4395 *
4396 * CIK uses hw IB checking so this is a nop (CIK).
4397 */
4398 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4399 {
4400 return 0;
4401 }
4402
4403 /*
4404 * vm
4405 * VMID 0 is the physical GPU addresses as used by the kernel.
4406 * VMIDs 1-15 are used for userspace clients and are handled
4407 * by the radeon vm/hsa code.
4408 */
4409 /**
4410 * cik_vm_init - cik vm init callback
4411 *
4412 * @rdev: radeon_device pointer
4413 *
4414 * Inits cik specific vm parameters (number of VMs, base of vram for
4415 * VMIDs 1-15) (CIK).
4416 * Returns 0 for success.
4417 */
4418 int cik_vm_init(struct radeon_device *rdev)
4419 {
4420 /* number of VMs */
4421 rdev->vm_manager.nvm = 16;
4422 /* base offset of vram pages */
4423 if (rdev->flags & RADEON_IS_IGP) {
4424 u64 tmp = RREG32(MC_VM_FB_OFFSET);
4425 tmp <<= 22;
4426 rdev->vm_manager.vram_base_offset = tmp;
4427 } else
4428 rdev->vm_manager.vram_base_offset = 0;
4429
4430 return 0;
4431 }
4432
4433 /**
4434 * cik_vm_fini - cik vm fini callback
4435 *
4436 * @rdev: radeon_device pointer
4437 *
4438 * Tear down any asic specific VM setup (CIK).
4439 */
4440 void cik_vm_fini(struct radeon_device *rdev)
4441 {
4442 }
4443
4444 /**
4445 * cik_vm_decode_fault - print human readable fault info
4446 *
4447 * @rdev: radeon_device pointer
4448 * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4449 * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4450 *
4451 * Print human readable fault information (CIK).
4452 */
4453 static void cik_vm_decode_fault(struct radeon_device *rdev,
4454 u32 status, u32 addr, u32 mc_client)
4455 {
4456 u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4457 u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4458 u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4459 char *block = (char *)&mc_client;
4460
4461 printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4462 protections, vmid, addr,
4463 (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4464 block, mc_id);
4465 }
4466
4467 /**
4468 * cik_vm_flush - cik vm flush using the CP
4469 *
4470 * @rdev: radeon_device pointer
4471 *
4472 * Update the page table base and flush the VM TLB
4473 * using the CP (CIK).
4474 */
4475 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4476 {
4477 struct radeon_ring *ring = &rdev->ring[ridx];
4478
4479 if (vm == NULL)
4480 return;
4481
4482 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4483 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4484 WRITE_DATA_DST_SEL(0)));
4485 if (vm->id < 8) {
4486 radeon_ring_write(ring,
4487 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4488 } else {
4489 radeon_ring_write(ring,
4490 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4491 }
4492 radeon_ring_write(ring, 0);
4493 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4494
4495 /* update SH_MEM_* regs */
4496 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4497 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4498 WRITE_DATA_DST_SEL(0)));
4499 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4500 radeon_ring_write(ring, 0);
4501 radeon_ring_write(ring, VMID(vm->id));
4502
4503 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4504 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4505 WRITE_DATA_DST_SEL(0)));
4506 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4507 radeon_ring_write(ring, 0);
4508
4509 radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4510 radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4511 radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4512 radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4513
4514 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4515 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4516 WRITE_DATA_DST_SEL(0)));
4517 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4518 radeon_ring_write(ring, 0);
4519 radeon_ring_write(ring, VMID(0));
4520
4521 /* HDP flush */
4522 /* We should be using the WAIT_REG_MEM packet here like in
4523 * cik_fence_ring_emit(), but it causes the CP to hang in this
4524 * context...
4525 */
4526 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4527 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4528 WRITE_DATA_DST_SEL(0)));
4529 radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4530 radeon_ring_write(ring, 0);
4531 radeon_ring_write(ring, 0);
4532
4533 /* bits 0-15 are the VM contexts0-15 */
4534 radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4535 radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4536 WRITE_DATA_DST_SEL(0)));
4537 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4538 radeon_ring_write(ring, 0);
4539 radeon_ring_write(ring, 1 << vm->id);
4540
4541 /* compute doesn't have PFP */
4542 if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4543 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4544 radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4545 radeon_ring_write(ring, 0x0);
4546 }
4547 }
4548
4549 /**
4550 * cik_vm_set_page - update the page tables using sDMA
4551 *
4552 * @rdev: radeon_device pointer
4553 * @ib: indirect buffer to fill with commands
4554 * @pe: addr of the page entry
4555 * @addr: dst addr to write into pe
4556 * @count: number of page entries to update
4557 * @incr: increase next addr by incr bytes
4558 * @flags: access flags
4559 *
4560 * Update the page tables using CP or sDMA (CIK).
4561 */
4562 void cik_vm_set_page(struct radeon_device *rdev,
4563 struct radeon_ib *ib,
4564 uint64_t pe,
4565 uint64_t addr, unsigned count,
4566 uint32_t incr, uint32_t flags)
4567 {
4568 uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4569 uint64_t value;
4570 unsigned ndw;
4571
4572 if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4573 /* CP */
4574 while (count) {
4575 ndw = 2 + count * 2;
4576 if (ndw > 0x3FFE)
4577 ndw = 0x3FFE;
4578
4579 ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4580 ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4581 WRITE_DATA_DST_SEL(1));
4582 ib->ptr[ib->length_dw++] = pe;
4583 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4584 for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4585 if (flags & RADEON_VM_PAGE_SYSTEM) {
4586 value = radeon_vm_map_gart(rdev, addr);
4587 value &= 0xFFFFFFFFFFFFF000ULL;
4588 } else if (flags & RADEON_VM_PAGE_VALID) {
4589 value = addr;
4590 } else {
4591 value = 0;
4592 }
4593 addr += incr;
4594 value |= r600_flags;
4595 ib->ptr[ib->length_dw++] = value;
4596 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4597 }
4598 }
4599 } else {
4600 /* DMA */
4601 if (flags & RADEON_VM_PAGE_SYSTEM) {
4602 while (count) {
4603 ndw = count * 2;
4604 if (ndw > 0xFFFFE)
4605 ndw = 0xFFFFE;
4606
4607 /* for non-physically contiguous pages (system) */
4608 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4609 ib->ptr[ib->length_dw++] = pe;
4610 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4611 ib->ptr[ib->length_dw++] = ndw;
4612 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4613 if (flags & RADEON_VM_PAGE_SYSTEM) {
4614 value = radeon_vm_map_gart(rdev, addr);
4615 value &= 0xFFFFFFFFFFFFF000ULL;
4616 } else if (flags & RADEON_VM_PAGE_VALID) {
4617 value = addr;
4618 } else {
4619 value = 0;
4620 }
4621 addr += incr;
4622 value |= r600_flags;
4623 ib->ptr[ib->length_dw++] = value;
4624 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4625 }
4626 }
4627 } else {
4628 while (count) {
4629 ndw = count;
4630 if (ndw > 0x7FFFF)
4631 ndw = 0x7FFFF;
4632
4633 if (flags & RADEON_VM_PAGE_VALID)
4634 value = addr;
4635 else
4636 value = 0;
4637 /* for physically contiguous pages (vram) */
4638 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4639 ib->ptr[ib->length_dw++] = pe; /* dst addr */
4640 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4641 ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4642 ib->ptr[ib->length_dw++] = 0;
4643 ib->ptr[ib->length_dw++] = value; /* value */
4644 ib->ptr[ib->length_dw++] = upper_32_bits(value);
4645 ib->ptr[ib->length_dw++] = incr; /* increment size */
4646 ib->ptr[ib->length_dw++] = 0;
4647 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4648 pe += ndw * 8;
4649 addr += ndw * incr;
4650 count -= ndw;
4651 }
4652 }
4653 while (ib->length_dw & 0x7)
4654 ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4655 }
4656 }
4657
4658 /**
4659 * cik_dma_vm_flush - cik vm flush using sDMA
4660 *
4661 * @rdev: radeon_device pointer
4662 *
4663 * Update the page table base and flush the VM TLB
4664 * using sDMA (CIK).
4665 */
4666 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4667 {
4668 struct radeon_ring *ring = &rdev->ring[ridx];
4669 u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4670 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4671 u32 ref_and_mask;
4672
4673 if (vm == NULL)
4674 return;
4675
4676 if (ridx == R600_RING_TYPE_DMA_INDEX)
4677 ref_and_mask = SDMA0;
4678 else
4679 ref_and_mask = SDMA1;
4680
4681 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4682 if (vm->id < 8) {
4683 radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4684 } else {
4685 radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4686 }
4687 radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4688
4689 /* update SH_MEM_* regs */
4690 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4692 radeon_ring_write(ring, VMID(vm->id));
4693
4694 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695 radeon_ring_write(ring, SH_MEM_BASES >> 2);
4696 radeon_ring_write(ring, 0);
4697
4698 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699 radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4700 radeon_ring_write(ring, 0);
4701
4702 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4703 radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4704 radeon_ring_write(ring, 1);
4705
4706 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4707 radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4708 radeon_ring_write(ring, 0);
4709
4710 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4711 radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4712 radeon_ring_write(ring, VMID(0));
4713
4714 /* flush HDP */
4715 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4716 radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4717 radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4718 radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4719 radeon_ring_write(ring, ref_and_mask); /* MASK */
4720 radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4721
4722 /* flush TLB */
4723 radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4724 radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4725 radeon_ring_write(ring, 1 << vm->id);
4726 }
4727
4728 /*
4729 * RLC
4730 * The RLC is a multi-purpose microengine that handles a
4731 * variety of functions, the most important of which is
4732 * the interrupt controller.
4733 */
4734 /**
4735 * cik_rlc_stop - stop the RLC ME
4736 *
4737 * @rdev: radeon_device pointer
4738 *
4739 * Halt the RLC ME (MicroEngine) (CIK).
4740 */
4741 static void cik_rlc_stop(struct radeon_device *rdev)
4742 {
4743 int i, j, k;
4744 u32 mask, tmp;
4745
4746 tmp = RREG32(CP_INT_CNTL_RING0);
4747 tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4748 WREG32(CP_INT_CNTL_RING0, tmp);
4749
4750 RREG32(CB_CGTT_SCLK_CTRL);
4751 RREG32(CB_CGTT_SCLK_CTRL);
4752 RREG32(CB_CGTT_SCLK_CTRL);
4753 RREG32(CB_CGTT_SCLK_CTRL);
4754
4755 tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4756 WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4757
4758 WREG32(RLC_CNTL, 0);
4759
4760 for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4761 for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4762 cik_select_se_sh(rdev, i, j);
4763 for (k = 0; k < rdev->usec_timeout; k++) {
4764 if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4765 break;
4766 udelay(1);
4767 }
4768 }
4769 }
4770 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4771
4772 mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4773 for (k = 0; k < rdev->usec_timeout; k++) {
4774 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4775 break;
4776 udelay(1);
4777 }
4778 }
4779
4780 /**
4781 * cik_rlc_start - start the RLC ME
4782 *
4783 * @rdev: radeon_device pointer
4784 *
4785 * Unhalt the RLC ME (MicroEngine) (CIK).
4786 */
4787 static void cik_rlc_start(struct radeon_device *rdev)
4788 {
4789 u32 tmp;
4790
4791 WREG32(RLC_CNTL, RLC_ENABLE);
4792
4793 tmp = RREG32(CP_INT_CNTL_RING0);
4794 tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4795 WREG32(CP_INT_CNTL_RING0, tmp);
4796
4797 udelay(50);
4798 }
4799
4800 /**
4801 * cik_rlc_resume - setup the RLC hw
4802 *
4803 * @rdev: radeon_device pointer
4804 *
4805 * Initialize the RLC registers, load the ucode,
4806 * and start the RLC (CIK).
4807 * Returns 0 for success, -EINVAL if the ucode is not available.
4808 */
4809 static int cik_rlc_resume(struct radeon_device *rdev)
4810 {
4811 u32 i, size;
4812 u32 clear_state_info[3];
4813 const __be32 *fw_data;
4814
4815 if (!rdev->rlc_fw)
4816 return -EINVAL;
4817
4818 switch (rdev->family) {
4819 case CHIP_BONAIRE:
4820 default:
4821 size = BONAIRE_RLC_UCODE_SIZE;
4822 break;
4823 case CHIP_KAVERI:
4824 size = KV_RLC_UCODE_SIZE;
4825 break;
4826 case CHIP_KABINI:
4827 size = KB_RLC_UCODE_SIZE;
4828 break;
4829 }
4830
4831 cik_rlc_stop(rdev);
4832
4833 WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4834 RREG32(GRBM_SOFT_RESET);
4835 udelay(50);
4836 WREG32(GRBM_SOFT_RESET, 0);
4837 RREG32(GRBM_SOFT_RESET);
4838 udelay(50);
4839
4840 WREG32(RLC_LB_CNTR_INIT, 0);
4841 WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4842
4843 cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4844 WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4845 WREG32(RLC_LB_PARAMS, 0x00600408);
4846 WREG32(RLC_LB_CNTL, 0x80000004);
4847
4848 WREG32(RLC_MC_CNTL, 0);
4849 WREG32(RLC_UCODE_CNTL, 0);
4850
4851 fw_data = (const __be32 *)rdev->rlc_fw->data;
4852 WREG32(RLC_GPM_UCODE_ADDR, 0);
4853 for (i = 0; i < size; i++)
4854 WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4855 WREG32(RLC_GPM_UCODE_ADDR, 0);
4856
4857 /* XXX */
4858 clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4859 clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4860 clear_state_info[2] = 0;//cik_default_size;
4861 WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4862 for (i = 0; i < 3; i++)
4863 WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4864 WREG32(RLC_DRIVER_DMA_STATUS, 0);
4865
4866 cik_rlc_start(rdev);
4867
4868 return 0;
4869 }
4870
4871 /*
4872 * Interrupts
4873 * Starting with r6xx, interrupts are handled via a ring buffer.
4874 * Ring buffers are areas of GPU accessible memory that the GPU
4875 * writes interrupt vectors into and the host reads vectors out of.
4876 * There is a rptr (read pointer) that determines where the
4877 * host is currently reading, and a wptr (write pointer)
4878 * which determines where the GPU has written. When the
4879 * pointers are equal, the ring is idle. When the GPU
4880 * writes vectors to the ring buffer, it increments the
4881 * wptr. When there is an interrupt, the host then starts
4882 * fetching commands and processing them until the pointers are
4883 * equal again at which point it updates the rptr.
4884 */
4885
4886 /**
4887 * cik_enable_interrupts - Enable the interrupt ring buffer
4888 *
4889 * @rdev: radeon_device pointer
4890 *
4891 * Enable the interrupt ring buffer (CIK).
4892 */
4893 static void cik_enable_interrupts(struct radeon_device *rdev)
4894 {
4895 u32 ih_cntl = RREG32(IH_CNTL);
4896 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4897
4898 ih_cntl |= ENABLE_INTR;
4899 ih_rb_cntl |= IH_RB_ENABLE;
4900 WREG32(IH_CNTL, ih_cntl);
4901 WREG32(IH_RB_CNTL, ih_rb_cntl);
4902 rdev->ih.enabled = true;
4903 }
4904
4905 /**
4906 * cik_disable_interrupts - Disable the interrupt ring buffer
4907 *
4908 * @rdev: radeon_device pointer
4909 *
4910 * Disable the interrupt ring buffer (CIK).
4911 */
4912 static void cik_disable_interrupts(struct radeon_device *rdev)
4913 {
4914 u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4915 u32 ih_cntl = RREG32(IH_CNTL);
4916
4917 ih_rb_cntl &= ~IH_RB_ENABLE;
4918 ih_cntl &= ~ENABLE_INTR;
4919 WREG32(IH_RB_CNTL, ih_rb_cntl);
4920 WREG32(IH_CNTL, ih_cntl);
4921 /* set rptr, wptr to 0 */
4922 WREG32(IH_RB_RPTR, 0);
4923 WREG32(IH_RB_WPTR, 0);
4924 rdev->ih.enabled = false;
4925 rdev->ih.rptr = 0;
4926 }
4927
4928 /**
4929 * cik_disable_interrupt_state - Disable all interrupt sources
4930 *
4931 * @rdev: radeon_device pointer
4932 *
4933 * Clear all interrupt enable bits used by the driver (CIK).
4934 */
4935 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4936 {
4937 u32 tmp;
4938
4939 /* gfx ring */
4940 WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4941 /* sdma */
4942 tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4943 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4944 tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4945 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4946 /* compute queues */
4947 WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4948 WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4949 WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4950 WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4951 WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4952 WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4953 WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4954 WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4955 /* grbm */
4956 WREG32(GRBM_INT_CNTL, 0);
4957 /* vline/vblank, etc. */
4958 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4959 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4960 if (rdev->num_crtc >= 4) {
4961 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4962 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4963 }
4964 if (rdev->num_crtc >= 6) {
4965 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4966 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4967 }
4968
4969 /* dac hotplug */
4970 WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4971
4972 /* digital hotplug */
4973 tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4974 WREG32(DC_HPD1_INT_CONTROL, tmp);
4975 tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4976 WREG32(DC_HPD2_INT_CONTROL, tmp);
4977 tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4978 WREG32(DC_HPD3_INT_CONTROL, tmp);
4979 tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4980 WREG32(DC_HPD4_INT_CONTROL, tmp);
4981 tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4982 WREG32(DC_HPD5_INT_CONTROL, tmp);
4983 tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4984 WREG32(DC_HPD6_INT_CONTROL, tmp);
4985
4986 }
4987
4988 /**
4989 * cik_irq_init - init and enable the interrupt ring
4990 *
4991 * @rdev: radeon_device pointer
4992 *
4993 * Allocate a ring buffer for the interrupt controller,
4994 * enable the RLC, disable interrupts, enable the IH
4995 * ring buffer and enable it (CIK).
4996 * Called at device load and reume.
4997 * Returns 0 for success, errors for failure.
4998 */
4999 static int cik_irq_init(struct radeon_device *rdev)
5000 {
5001 int ret = 0;
5002 int rb_bufsz;
5003 u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5004
5005 /* allocate ring */
5006 ret = r600_ih_ring_alloc(rdev);
5007 if (ret)
5008 return ret;
5009
5010 /* disable irqs */
5011 cik_disable_interrupts(rdev);
5012
5013 /* init rlc */
5014 ret = cik_rlc_resume(rdev);
5015 if (ret) {
5016 r600_ih_ring_fini(rdev);
5017 return ret;
5018 }
5019
5020 /* setup interrupt control */
5021 /* XXX this should actually be a bus address, not an MC address. same on older asics */
5022 WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5023 interrupt_cntl = RREG32(INTERRUPT_CNTL);
5024 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5025 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5026 */
5027 interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5028 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5029 interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5030 WREG32(INTERRUPT_CNTL, interrupt_cntl);
5031
5032 WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5033 rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5034
5035 ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5036 IH_WPTR_OVERFLOW_CLEAR |
5037 (rb_bufsz << 1));
5038
5039 if (rdev->wb.enabled)
5040 ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5041
5042 /* set the writeback address whether it's enabled or not */
5043 WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5044 WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5045
5046 WREG32(IH_RB_CNTL, ih_rb_cntl);
5047
5048 /* set rptr, wptr to 0 */
5049 WREG32(IH_RB_RPTR, 0);
5050 WREG32(IH_RB_WPTR, 0);
5051
5052 /* Default settings for IH_CNTL (disabled at first) */
5053 ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5054 /* RPTR_REARM only works if msi's are enabled */
5055 if (rdev->msi_enabled)
5056 ih_cntl |= RPTR_REARM;
5057 WREG32(IH_CNTL, ih_cntl);
5058
5059 /* force the active interrupt state to all disabled */
5060 cik_disable_interrupt_state(rdev);
5061
5062 pci_set_master(rdev->pdev);
5063
5064 /* enable irqs */
5065 cik_enable_interrupts(rdev);
5066
5067 return ret;
5068 }
5069
5070 /**
5071 * cik_irq_set - enable/disable interrupt sources
5072 *
5073 * @rdev: radeon_device pointer
5074 *
5075 * Enable interrupt sources on the GPU (vblanks, hpd,
5076 * etc.) (CIK).
5077 * Returns 0 for success, errors for failure.
5078 */
5079 int cik_irq_set(struct radeon_device *rdev)
5080 {
5081 u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5082 PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5083 u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5084 u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5085 u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5086 u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5087 u32 grbm_int_cntl = 0;
5088 u32 dma_cntl, dma_cntl1;
5089
5090 if (!rdev->irq.installed) {
5091 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5092 return -EINVAL;
5093 }
5094 /* don't enable anything if the ih is disabled */
5095 if (!rdev->ih.enabled) {
5096 cik_disable_interrupts(rdev);
5097 /* force the active interrupt state to all disabled */
5098 cik_disable_interrupt_state(rdev);
5099 return 0;
5100 }
5101
5102 hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5103 hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5104 hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5105 hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5106 hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5107 hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5108
5109 dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5110 dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5111
5112 cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5113 cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5114 cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5115 cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5116 cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5117 cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5118 cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5119 cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5120
5121 /* enable CP interrupts on all rings */
5122 if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5123 DRM_DEBUG("cik_irq_set: sw int gfx\n");
5124 cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5125 }
5126 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5127 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5128 DRM_DEBUG("si_irq_set: sw int cp1\n");
5129 if (ring->me == 1) {
5130 switch (ring->pipe) {
5131 case 0:
5132 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5133 break;
5134 case 1:
5135 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5136 break;
5137 case 2:
5138 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5139 break;
5140 case 3:
5141 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5142 break;
5143 default:
5144 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5145 break;
5146 }
5147 } else if (ring->me == 2) {
5148 switch (ring->pipe) {
5149 case 0:
5150 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5151 break;
5152 case 1:
5153 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5154 break;
5155 case 2:
5156 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5157 break;
5158 case 3:
5159 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5160 break;
5161 default:
5162 DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5163 break;
5164 }
5165 } else {
5166 DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5167 }
5168 }
5169 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5170 struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5171 DRM_DEBUG("si_irq_set: sw int cp2\n");
5172 if (ring->me == 1) {
5173 switch (ring->pipe) {
5174 case 0:
5175 cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5176 break;
5177 case 1:
5178 cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5179 break;
5180 case 2:
5181 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5182 break;
5183 case 3:
5184 cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5185 break;
5186 default:
5187 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5188 break;
5189 }
5190 } else if (ring->me == 2) {
5191 switch (ring->pipe) {
5192 case 0:
5193 cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5194 break;
5195 case 1:
5196 cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5197 break;
5198 case 2:
5199 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5200 break;
5201 case 3:
5202 cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5203 break;
5204 default:
5205 DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5206 break;
5207 }
5208 } else {
5209 DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5210 }
5211 }
5212
5213 if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5214 DRM_DEBUG("cik_irq_set: sw int dma\n");
5215 dma_cntl |= TRAP_ENABLE;
5216 }
5217
5218 if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5219 DRM_DEBUG("cik_irq_set: sw int dma1\n");
5220 dma_cntl1 |= TRAP_ENABLE;
5221 }
5222
5223 if (rdev->irq.crtc_vblank_int[0] ||
5224 atomic_read(&rdev->irq.pflip[0])) {
5225 DRM_DEBUG("cik_irq_set: vblank 0\n");
5226 crtc1 |= VBLANK_INTERRUPT_MASK;
5227 }
5228 if (rdev->irq.crtc_vblank_int[1] ||
5229 atomic_read(&rdev->irq.pflip[1])) {
5230 DRM_DEBUG("cik_irq_set: vblank 1\n");
5231 crtc2 |= VBLANK_INTERRUPT_MASK;
5232 }
5233 if (rdev->irq.crtc_vblank_int[2] ||
5234 atomic_read(&rdev->irq.pflip[2])) {
5235 DRM_DEBUG("cik_irq_set: vblank 2\n");
5236 crtc3 |= VBLANK_INTERRUPT_MASK;
5237 }
5238 if (rdev->irq.crtc_vblank_int[3] ||
5239 atomic_read(&rdev->irq.pflip[3])) {
5240 DRM_DEBUG("cik_irq_set: vblank 3\n");
5241 crtc4 |= VBLANK_INTERRUPT_MASK;
5242 }
5243 if (rdev->irq.crtc_vblank_int[4] ||
5244 atomic_read(&rdev->irq.pflip[4])) {
5245 DRM_DEBUG("cik_irq_set: vblank 4\n");
5246 crtc5 |= VBLANK_INTERRUPT_MASK;
5247 }
5248 if (rdev->irq.crtc_vblank_int[5] ||
5249 atomic_read(&rdev->irq.pflip[5])) {
5250 DRM_DEBUG("cik_irq_set: vblank 5\n");
5251 crtc6 |= VBLANK_INTERRUPT_MASK;
5252 }
5253 if (rdev->irq.hpd[0]) {
5254 DRM_DEBUG("cik_irq_set: hpd 1\n");
5255 hpd1 |= DC_HPDx_INT_EN;
5256 }
5257 if (rdev->irq.hpd[1]) {
5258 DRM_DEBUG("cik_irq_set: hpd 2\n");
5259 hpd2 |= DC_HPDx_INT_EN;
5260 }
5261 if (rdev->irq.hpd[2]) {
5262 DRM_DEBUG("cik_irq_set: hpd 3\n");
5263 hpd3 |= DC_HPDx_INT_EN;
5264 }
5265 if (rdev->irq.hpd[3]) {
5266 DRM_DEBUG("cik_irq_set: hpd 4\n");
5267 hpd4 |= DC_HPDx_INT_EN;
5268 }
5269 if (rdev->irq.hpd[4]) {
5270 DRM_DEBUG("cik_irq_set: hpd 5\n");
5271 hpd5 |= DC_HPDx_INT_EN;
5272 }
5273 if (rdev->irq.hpd[5]) {
5274 DRM_DEBUG("cik_irq_set: hpd 6\n");
5275 hpd6 |= DC_HPDx_INT_EN;
5276 }
5277
5278 WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5279
5280 WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5281 WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5282
5283 WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5284 WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5285 WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5286 WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5287 WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5288 WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5289 WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5290 WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5291
5292 WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5293
5294 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5295 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5296 if (rdev->num_crtc >= 4) {
5297 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5298 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5299 }
5300 if (rdev->num_crtc >= 6) {
5301 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5302 WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5303 }
5304
5305 WREG32(DC_HPD1_INT_CONTROL, hpd1);
5306 WREG32(DC_HPD2_INT_CONTROL, hpd2);
5307 WREG32(DC_HPD3_INT_CONTROL, hpd3);
5308 WREG32(DC_HPD4_INT_CONTROL, hpd4);
5309 WREG32(DC_HPD5_INT_CONTROL, hpd5);
5310 WREG32(DC_HPD6_INT_CONTROL, hpd6);
5311
5312 return 0;
5313 }
5314
5315 /**
5316 * cik_irq_ack - ack interrupt sources
5317 *
5318 * @rdev: radeon_device pointer
5319 *
5320 * Ack interrupt sources on the GPU (vblanks, hpd,
5321 * etc.) (CIK). Certain interrupts sources are sw
5322 * generated and do not require an explicit ack.
5323 */
5324 static inline void cik_irq_ack(struct radeon_device *rdev)
5325 {
5326 u32 tmp;
5327
5328 rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5329 rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5330 rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5331 rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5332 rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5333 rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5334 rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5335
5336 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5337 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5338 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5339 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5340 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5341 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5342 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5343 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5344
5345 if (rdev->num_crtc >= 4) {
5346 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5347 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5348 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5349 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5350 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5351 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5352 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5353 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5354 }
5355
5356 if (rdev->num_crtc >= 6) {
5357 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5358 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5359 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5360 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5361 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5362 WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5363 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5364 WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5365 }
5366
5367 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5368 tmp = RREG32(DC_HPD1_INT_CONTROL);
5369 tmp |= DC_HPDx_INT_ACK;
5370 WREG32(DC_HPD1_INT_CONTROL, tmp);
5371 }
5372 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5373 tmp = RREG32(DC_HPD2_INT_CONTROL);
5374 tmp |= DC_HPDx_INT_ACK;
5375 WREG32(DC_HPD2_INT_CONTROL, tmp);
5376 }
5377 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5378 tmp = RREG32(DC_HPD3_INT_CONTROL);
5379 tmp |= DC_HPDx_INT_ACK;
5380 WREG32(DC_HPD3_INT_CONTROL, tmp);
5381 }
5382 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5383 tmp = RREG32(DC_HPD4_INT_CONTROL);
5384 tmp |= DC_HPDx_INT_ACK;
5385 WREG32(DC_HPD4_INT_CONTROL, tmp);
5386 }
5387 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5388 tmp = RREG32(DC_HPD5_INT_CONTROL);
5389 tmp |= DC_HPDx_INT_ACK;
5390 WREG32(DC_HPD5_INT_CONTROL, tmp);
5391 }
5392 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5393 tmp = RREG32(DC_HPD5_INT_CONTROL);
5394 tmp |= DC_HPDx_INT_ACK;
5395 WREG32(DC_HPD6_INT_CONTROL, tmp);
5396 }
5397 }
5398
5399 /**
5400 * cik_irq_disable - disable interrupts
5401 *
5402 * @rdev: radeon_device pointer
5403 *
5404 * Disable interrupts on the hw (CIK).
5405 */
5406 static void cik_irq_disable(struct radeon_device *rdev)
5407 {
5408 cik_disable_interrupts(rdev);
5409 /* Wait and acknowledge irq */
5410 mdelay(1);
5411 cik_irq_ack(rdev);
5412 cik_disable_interrupt_state(rdev);
5413 }
5414
5415 /**
5416 * cik_irq_disable - disable interrupts for suspend
5417 *
5418 * @rdev: radeon_device pointer
5419 *
5420 * Disable interrupts and stop the RLC (CIK).
5421 * Used for suspend.
5422 */
5423 static void cik_irq_suspend(struct radeon_device *rdev)
5424 {
5425 cik_irq_disable(rdev);
5426 cik_rlc_stop(rdev);
5427 }
5428
5429 /**
5430 * cik_irq_fini - tear down interrupt support
5431 *
5432 * @rdev: radeon_device pointer
5433 *
5434 * Disable interrupts on the hw and free the IH ring
5435 * buffer (CIK).
5436 * Used for driver unload.
5437 */
5438 static void cik_irq_fini(struct radeon_device *rdev)
5439 {
5440 cik_irq_suspend(rdev);
5441 r600_ih_ring_fini(rdev);
5442 }
5443
5444 /**
5445 * cik_get_ih_wptr - get the IH ring buffer wptr
5446 *
5447 * @rdev: radeon_device pointer
5448 *
5449 * Get the IH ring buffer wptr from either the register
5450 * or the writeback memory buffer (CIK). Also check for
5451 * ring buffer overflow and deal with it.
5452 * Used by cik_irq_process().
5453 * Returns the value of the wptr.
5454 */
5455 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5456 {
5457 u32 wptr, tmp;
5458
5459 if (rdev->wb.enabled)
5460 wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5461 else
5462 wptr = RREG32(IH_RB_WPTR);
5463
5464 if (wptr & RB_OVERFLOW) {
5465 /* When a ring buffer overflow happen start parsing interrupt
5466 * from the last not overwritten vector (wptr + 16). Hopefully
5467 * this should allow us to catchup.
5468 */
5469 dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5470 wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5471 rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5472 tmp = RREG32(IH_RB_CNTL);
5473 tmp |= IH_WPTR_OVERFLOW_CLEAR;
5474 WREG32(IH_RB_CNTL, tmp);
5475 }
5476 return (wptr & rdev->ih.ptr_mask);
5477 }
5478
5479 /* CIK IV Ring
5480 * Each IV ring entry is 128 bits:
5481 * [7:0] - interrupt source id
5482 * [31:8] - reserved
5483 * [59:32] - interrupt source data
5484 * [63:60] - reserved
5485 * [71:64] - RINGID
5486 * CP:
5487 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5488 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5489 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5490 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5491 * PIPE_ID - ME0 0=3D
5492 * - ME1&2 compute dispatcher (4 pipes each)
5493 * SDMA:
5494 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
5495 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
5496 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5497 * [79:72] - VMID
5498 * [95:80] - PASID
5499 * [127:96] - reserved
5500 */
5501 /**
5502 * cik_irq_process - interrupt handler
5503 *
5504 * @rdev: radeon_device pointer
5505 *
5506 * Interrupt hander (CIK). Walk the IH ring,
5507 * ack interrupts and schedule work to handle
5508 * interrupt events.
5509 * Returns irq process return code.
5510 */
5511 int cik_irq_process(struct radeon_device *rdev)
5512 {
5513 struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5514 struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5515 u32 wptr;
5516 u32 rptr;
5517 u32 src_id, src_data, ring_id;
5518 u8 me_id, pipe_id, queue_id;
5519 u32 ring_index;
5520 bool queue_hotplug = false;
5521 bool queue_reset = false;
5522 u32 addr, status, mc_client;
5523
5524 if (!rdev->ih.enabled || rdev->shutdown)
5525 return IRQ_NONE;
5526
5527 wptr = cik_get_ih_wptr(rdev);
5528
5529 restart_ih:
5530 /* is somebody else already processing irqs? */
5531 if (atomic_xchg(&rdev->ih.lock, 1))
5532 return IRQ_NONE;
5533
5534 rptr = rdev->ih.rptr;
5535 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5536
5537 /* Order reading of wptr vs. reading of IH ring data */
5538 rmb();
5539
5540 /* display interrupts */
5541 cik_irq_ack(rdev);
5542
5543 while (rptr != wptr) {
5544 /* wptr/rptr are in bytes! */
5545 ring_index = rptr / 4;
5546 src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5547 src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5548 ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5549
5550 switch (src_id) {
5551 case 1: /* D1 vblank/vline */
5552 switch (src_data) {
5553 case 0: /* D1 vblank */
5554 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5555 if (rdev->irq.crtc_vblank_int[0]) {
5556 drm_handle_vblank(rdev->ddev, 0);
5557 rdev->pm.vblank_sync = true;
5558 wake_up(&rdev->irq.vblank_queue);
5559 }
5560 if (atomic_read(&rdev->irq.pflip[0]))
5561 radeon_crtc_handle_flip(rdev, 0);
5562 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5563 DRM_DEBUG("IH: D1 vblank\n");
5564 }
5565 break;
5566 case 1: /* D1 vline */
5567 if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5568 rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5569 DRM_DEBUG("IH: D1 vline\n");
5570 }
5571 break;
5572 default:
5573 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5574 break;
5575 }
5576 break;
5577 case 2: /* D2 vblank/vline */
5578 switch (src_data) {
5579 case 0: /* D2 vblank */
5580 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5581 if (rdev->irq.crtc_vblank_int[1]) {
5582 drm_handle_vblank(rdev->ddev, 1);
5583 rdev->pm.vblank_sync = true;
5584 wake_up(&rdev->irq.vblank_queue);
5585 }
5586 if (atomic_read(&rdev->irq.pflip[1]))
5587 radeon_crtc_handle_flip(rdev, 1);
5588 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5589 DRM_DEBUG("IH: D2 vblank\n");
5590 }
5591 break;
5592 case 1: /* D2 vline */
5593 if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5594 rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5595 DRM_DEBUG("IH: D2 vline\n");
5596 }
5597 break;
5598 default:
5599 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5600 break;
5601 }
5602 break;
5603 case 3: /* D3 vblank/vline */
5604 switch (src_data) {
5605 case 0: /* D3 vblank */
5606 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5607 if (rdev->irq.crtc_vblank_int[2]) {
5608 drm_handle_vblank(rdev->ddev, 2);
5609 rdev->pm.vblank_sync = true;
5610 wake_up(&rdev->irq.vblank_queue);
5611 }
5612 if (atomic_read(&rdev->irq.pflip[2]))
5613 radeon_crtc_handle_flip(rdev, 2);
5614 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5615 DRM_DEBUG("IH: D3 vblank\n");
5616 }
5617 break;
5618 case 1: /* D3 vline */
5619 if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5620 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5621 DRM_DEBUG("IH: D3 vline\n");
5622 }
5623 break;
5624 default:
5625 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5626 break;
5627 }
5628 break;
5629 case 4: /* D4 vblank/vline */
5630 switch (src_data) {
5631 case 0: /* D4 vblank */
5632 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5633 if (rdev->irq.crtc_vblank_int[3]) {
5634 drm_handle_vblank(rdev->ddev, 3);
5635 rdev->pm.vblank_sync = true;
5636 wake_up(&rdev->irq.vblank_queue);
5637 }
5638 if (atomic_read(&rdev->irq.pflip[3]))
5639 radeon_crtc_handle_flip(rdev, 3);
5640 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5641 DRM_DEBUG("IH: D4 vblank\n");
5642 }
5643 break;
5644 case 1: /* D4 vline */
5645 if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5646 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5647 DRM_DEBUG("IH: D4 vline\n");
5648 }
5649 break;
5650 default:
5651 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5652 break;
5653 }
5654 break;
5655 case 5: /* D5 vblank/vline */
5656 switch (src_data) {
5657 case 0: /* D5 vblank */
5658 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5659 if (rdev->irq.crtc_vblank_int[4]) {
5660 drm_handle_vblank(rdev->ddev, 4);
5661 rdev->pm.vblank_sync = true;
5662 wake_up(&rdev->irq.vblank_queue);
5663 }
5664 if (atomic_read(&rdev->irq.pflip[4]))
5665 radeon_crtc_handle_flip(rdev, 4);
5666 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5667 DRM_DEBUG("IH: D5 vblank\n");
5668 }
5669 break;
5670 case 1: /* D5 vline */
5671 if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5672 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5673 DRM_DEBUG("IH: D5 vline\n");
5674 }
5675 break;
5676 default:
5677 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5678 break;
5679 }
5680 break;
5681 case 6: /* D6 vblank/vline */
5682 switch (src_data) {
5683 case 0: /* D6 vblank */
5684 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5685 if (rdev->irq.crtc_vblank_int[5]) {
5686 drm_handle_vblank(rdev->ddev, 5);
5687 rdev->pm.vblank_sync = true;
5688 wake_up(&rdev->irq.vblank_queue);
5689 }
5690 if (atomic_read(&rdev->irq.pflip[5]))
5691 radeon_crtc_handle_flip(rdev, 5);
5692 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5693 DRM_DEBUG("IH: D6 vblank\n");
5694 }
5695 break;
5696 case 1: /* D6 vline */
5697 if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5698 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5699 DRM_DEBUG("IH: D6 vline\n");
5700 }
5701 break;
5702 default:
5703 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5704 break;
5705 }
5706 break;
5707 case 42: /* HPD hotplug */
5708 switch (src_data) {
5709 case 0:
5710 if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5711 rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5712 queue_hotplug = true;
5713 DRM_DEBUG("IH: HPD1\n");
5714 }
5715 break;
5716 case 1:
5717 if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5718 rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5719 queue_hotplug = true;
5720 DRM_DEBUG("IH: HPD2\n");
5721 }
5722 break;
5723 case 2:
5724 if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5725 rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5726 queue_hotplug = true;
5727 DRM_DEBUG("IH: HPD3\n");
5728 }
5729 break;
5730 case 3:
5731 if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5732 rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5733 queue_hotplug = true;
5734 DRM_DEBUG("IH: HPD4\n");
5735 }
5736 break;
5737 case 4:
5738 if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5739 rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5740 queue_hotplug = true;
5741 DRM_DEBUG("IH: HPD5\n");
5742 }
5743 break;
5744 case 5:
5745 if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5746 rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5747 queue_hotplug = true;
5748 DRM_DEBUG("IH: HPD6\n");
5749 }
5750 break;
5751 default:
5752 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5753 break;
5754 }
5755 break;
5756 case 146:
5757 case 147:
5758 addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
5759 status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
5760 mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
5761 dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5762 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5763 addr);
5764 dev_err(rdev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5765 status);
5766 cik_vm_decode_fault(rdev, status, addr, mc_client);
5767 /* reset addr and status */
5768 WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5769 break;
5770 case 176: /* GFX RB CP_INT */
5771 case 177: /* GFX IB CP_INT */
5772 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5773 break;
5774 case 181: /* CP EOP event */
5775 DRM_DEBUG("IH: CP EOP\n");
5776 /* XXX check the bitfield order! */
5777 me_id = (ring_id & 0x60) >> 5;
5778 pipe_id = (ring_id & 0x18) >> 3;
5779 queue_id = (ring_id & 0x7) >> 0;
5780 switch (me_id) {
5781 case 0:
5782 radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5783 break;
5784 case 1:
5785 case 2:
5786 if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5787 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5788 if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5789 radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5790 break;
5791 }
5792 break;
5793 case 184: /* CP Privileged reg access */
5794 DRM_ERROR("Illegal register access in command stream\n");
5795 /* XXX check the bitfield order! */
5796 me_id = (ring_id & 0x60) >> 5;
5797 pipe_id = (ring_id & 0x18) >> 3;
5798 queue_id = (ring_id & 0x7) >> 0;
5799 switch (me_id) {
5800 case 0:
5801 /* This results in a full GPU reset, but all we need to do is soft
5802 * reset the CP for gfx
5803 */
5804 queue_reset = true;
5805 break;
5806 case 1:
5807 /* XXX compute */
5808 queue_reset = true;
5809 break;
5810 case 2:
5811 /* XXX compute */
5812 queue_reset = true;
5813 break;
5814 }
5815 break;
5816 case 185: /* CP Privileged inst */
5817 DRM_ERROR("Illegal instruction in command stream\n");
5818 /* XXX check the bitfield order! */
5819 me_id = (ring_id & 0x60) >> 5;
5820 pipe_id = (ring_id & 0x18) >> 3;
5821 queue_id = (ring_id & 0x7) >> 0;
5822 switch (me_id) {
5823 case 0:
5824 /* This results in a full GPU reset, but all we need to do is soft
5825 * reset the CP for gfx
5826 */
5827 queue_reset = true;
5828 break;
5829 case 1:
5830 /* XXX compute */
5831 queue_reset = true;
5832 break;
5833 case 2:
5834 /* XXX compute */
5835 queue_reset = true;
5836 break;
5837 }
5838 break;
5839 case 224: /* SDMA trap event */
5840 /* XXX check the bitfield order! */
5841 me_id = (ring_id & 0x3) >> 0;
5842 queue_id = (ring_id & 0xc) >> 2;
5843 DRM_DEBUG("IH: SDMA trap\n");
5844 switch (me_id) {
5845 case 0:
5846 switch (queue_id) {
5847 case 0:
5848 radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5849 break;
5850 case 1:
5851 /* XXX compute */
5852 break;
5853 case 2:
5854 /* XXX compute */
5855 break;
5856 }
5857 break;
5858 case 1:
5859 switch (queue_id) {
5860 case 0:
5861 radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5862 break;
5863 case 1:
5864 /* XXX compute */
5865 break;
5866 case 2:
5867 /* XXX compute */
5868 break;
5869 }
5870 break;
5871 }
5872 break;
5873 case 241: /* SDMA Privileged inst */
5874 case 247: /* SDMA Privileged inst */
5875 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5876 /* XXX check the bitfield order! */
5877 me_id = (ring_id & 0x3) >> 0;
5878 queue_id = (ring_id & 0xc) >> 2;
5879 switch (me_id) {
5880 case 0:
5881 switch (queue_id) {
5882 case 0:
5883 queue_reset = true;
5884 break;
5885 case 1:
5886 /* XXX compute */
5887 queue_reset = true;
5888 break;
5889 case 2:
5890 /* XXX compute */
5891 queue_reset = true;
5892 break;
5893 }
5894 break;
5895 case 1:
5896 switch (queue_id) {
5897 case 0:
5898 queue_reset = true;
5899 break;
5900 case 1:
5901 /* XXX compute */
5902 queue_reset = true;
5903 break;
5904 case 2:
5905 /* XXX compute */
5906 queue_reset = true;
5907 break;
5908 }
5909 break;
5910 }
5911 break;
5912 case 233: /* GUI IDLE */
5913 DRM_DEBUG("IH: GUI idle\n");
5914 break;
5915 default:
5916 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5917 break;
5918 }
5919
5920 /* wptr/rptr are in bytes! */
5921 rptr += 16;
5922 rptr &= rdev->ih.ptr_mask;
5923 }
5924 if (queue_hotplug)
5925 schedule_work(&rdev->hotplug_work);
5926 if (queue_reset)
5927 schedule_work(&rdev->reset_work);
5928 rdev->ih.rptr = rptr;
5929 WREG32(IH_RB_RPTR, rdev->ih.rptr);
5930 atomic_set(&rdev->ih.lock, 0);
5931
5932 /* make sure wptr hasn't changed while processing */
5933 wptr = cik_get_ih_wptr(rdev);
5934 if (wptr != rptr)
5935 goto restart_ih;
5936
5937 return IRQ_HANDLED;
5938 }
5939
5940 /*
5941 * startup/shutdown callbacks
5942 */
5943 /**
5944 * cik_startup - program the asic to a functional state
5945 *
5946 * @rdev: radeon_device pointer
5947 *
5948 * Programs the asic to a functional state (CIK).
5949 * Called by cik_init() and cik_resume().
5950 * Returns 0 for success, error for failure.
5951 */
5952 static int cik_startup(struct radeon_device *rdev)
5953 {
5954 struct radeon_ring *ring;
5955 int r;
5956
5957 if (rdev->flags & RADEON_IS_IGP) {
5958 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5959 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5960 r = cik_init_microcode(rdev);
5961 if (r) {
5962 DRM_ERROR("Failed to load firmware!\n");
5963 return r;
5964 }
5965 }
5966 } else {
5967 if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5968 !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5969 !rdev->mc_fw) {
5970 r = cik_init_microcode(rdev);
5971 if (r) {
5972 DRM_ERROR("Failed to load firmware!\n");
5973 return r;
5974 }
5975 }
5976
5977 r = ci_mc_load_microcode(rdev);
5978 if (r) {
5979 DRM_ERROR("Failed to load MC firmware!\n");
5980 return r;
5981 }
5982 }
5983
5984 r = r600_vram_scratch_init(rdev);
5985 if (r)
5986 return r;
5987
5988 cik_mc_program(rdev);
5989 r = cik_pcie_gart_enable(rdev);
5990 if (r)
5991 return r;
5992 cik_gpu_init(rdev);
5993
5994 /* allocate rlc buffers */
5995 r = si_rlc_init(rdev);
5996 if (r) {
5997 DRM_ERROR("Failed to init rlc BOs!\n");
5998 return r;
5999 }
6000
6001 /* allocate wb buffer */
6002 r = radeon_wb_init(rdev);
6003 if (r)
6004 return r;
6005
6006 /* allocate mec buffers */
6007 r = cik_mec_init(rdev);
6008 if (r) {
6009 DRM_ERROR("Failed to init MEC BOs!\n");
6010 return r;
6011 }
6012
6013 r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6014 if (r) {
6015 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6016 return r;
6017 }
6018
6019 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6020 if (r) {
6021 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6022 return r;
6023 }
6024
6025 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6026 if (r) {
6027 dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6028 return r;
6029 }
6030
6031 r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6032 if (r) {
6033 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6034 return r;
6035 }
6036
6037 r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6038 if (r) {
6039 dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6040 return r;
6041 }
6042
6043 r = cik_uvd_resume(rdev);
6044 if (!r) {
6045 r = radeon_fence_driver_start_ring(rdev,
6046 R600_RING_TYPE_UVD_INDEX);
6047 if (r)
6048 dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6049 }
6050 if (r)
6051 rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6052
6053 /* Enable IRQ */
6054 if (!rdev->irq.installed) {
6055 r = radeon_irq_kms_init(rdev);
6056 if (r)
6057 return r;
6058 }
6059
6060 r = cik_irq_init(rdev);
6061 if (r) {
6062 DRM_ERROR("radeon: IH init failed (%d).\n", r);
6063 radeon_irq_kms_fini(rdev);
6064 return r;
6065 }
6066 cik_irq_set(rdev);
6067
6068 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6069 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6070 CP_RB0_RPTR, CP_RB0_WPTR,
6071 0, 0xfffff, RADEON_CP_PACKET2);
6072 if (r)
6073 return r;
6074
6075 /* set up the compute queues */
6076 /* type-2 packets are deprecated on MEC, use type-3 instead */
6077 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6078 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6079 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6080 0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6081 if (r)
6082 return r;
6083 ring->me = 1; /* first MEC */
6084 ring->pipe = 0; /* first pipe */
6085 ring->queue = 0; /* first queue */
6086 ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6087
6088 /* type-2 packets are deprecated on MEC, use type-3 instead */
6089 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6090 r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6091 CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6092 0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6093 if (r)
6094 return r;
6095 /* dGPU only have 1 MEC */
6096 ring->me = 1; /* first MEC */
6097 ring->pipe = 0; /* first pipe */
6098 ring->queue = 1; /* second queue */
6099 ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6100
6101 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6102 r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6103 SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6104 SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6105 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6106 if (r)
6107 return r;
6108
6109 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6110 r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6111 SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6112 SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6113 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6114 if (r)
6115 return r;
6116
6117 r = cik_cp_resume(rdev);
6118 if (r)
6119 return r;
6120
6121 r = cik_sdma_resume(rdev);
6122 if (r)
6123 return r;
6124
6125 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6126 if (ring->ring_size) {
6127 r = radeon_ring_init(rdev, ring, ring->ring_size,
6128 R600_WB_UVD_RPTR_OFFSET,
6129 UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6130 0, 0xfffff, RADEON_CP_PACKET2);
6131 if (!r)
6132 r = r600_uvd_init(rdev);
6133 if (r)
6134 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6135 }
6136
6137 r = radeon_ib_pool_init(rdev);
6138 if (r) {
6139 dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6140 return r;
6141 }
6142
6143 r = radeon_vm_manager_init(rdev);
6144 if (r) {
6145 dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6146 return r;
6147 }
6148
6149 return 0;
6150 }
6151
6152 /**
6153 * cik_resume - resume the asic to a functional state
6154 *
6155 * @rdev: radeon_device pointer
6156 *
6157 * Programs the asic to a functional state (CIK).
6158 * Called at resume.
6159 * Returns 0 for success, error for failure.
6160 */
6161 int cik_resume(struct radeon_device *rdev)
6162 {
6163 int r;
6164
6165 /* post card */
6166 atom_asic_init(rdev->mode_info.atom_context);
6167
6168 /* init golden registers */
6169 cik_init_golden_registers(rdev);
6170
6171 rdev->accel_working = true;
6172 r = cik_startup(rdev);
6173 if (r) {
6174 DRM_ERROR("cik startup failed on resume\n");
6175 rdev->accel_working = false;
6176 return r;
6177 }
6178
6179 return r;
6180
6181 }
6182
6183 /**
6184 * cik_suspend - suspend the asic
6185 *
6186 * @rdev: radeon_device pointer
6187 *
6188 * Bring the chip into a state suitable for suspend (CIK).
6189 * Called at suspend.
6190 * Returns 0 for success.
6191 */
6192 int cik_suspend(struct radeon_device *rdev)
6193 {
6194 radeon_vm_manager_fini(rdev);
6195 cik_cp_enable(rdev, false);
6196 cik_sdma_enable(rdev, false);
6197 r600_uvd_rbc_stop(rdev);
6198 radeon_uvd_suspend(rdev);
6199 cik_irq_suspend(rdev);
6200 radeon_wb_disable(rdev);
6201 cik_pcie_gart_disable(rdev);
6202 return 0;
6203 }
6204
6205 /* Plan is to move initialization in that function and use
6206 * helper function so that radeon_device_init pretty much
6207 * do nothing more than calling asic specific function. This
6208 * should also allow to remove a bunch of callback function
6209 * like vram_info.
6210 */
6211 /**
6212 * cik_init - asic specific driver and hw init
6213 *
6214 * @rdev: radeon_device pointer
6215 *
6216 * Setup asic specific driver variables and program the hw
6217 * to a functional state (CIK).
6218 * Called at driver startup.
6219 * Returns 0 for success, errors for failure.
6220 */
6221 int cik_init(struct radeon_device *rdev)
6222 {
6223 struct radeon_ring *ring;
6224 int r;
6225
6226 /* Read BIOS */
6227 if (!radeon_get_bios(rdev)) {
6228 if (ASIC_IS_AVIVO(rdev))
6229 return -EINVAL;
6230 }
6231 /* Must be an ATOMBIOS */
6232 if (!rdev->is_atom_bios) {
6233 dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6234 return -EINVAL;
6235 }
6236 r = radeon_atombios_init(rdev);
6237 if (r)
6238 return r;
6239
6240 /* Post card if necessary */
6241 if (!radeon_card_posted(rdev)) {
6242 if (!rdev->bios) {
6243 dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6244 return -EINVAL;
6245 }
6246 DRM_INFO("GPU not posted. posting now...\n");
6247 atom_asic_init(rdev->mode_info.atom_context);
6248 }
6249 /* init golden registers */
6250 cik_init_golden_registers(rdev);
6251 /* Initialize scratch registers */
6252 cik_scratch_init(rdev);
6253 /* Initialize surface registers */
6254 radeon_surface_init(rdev);
6255 /* Initialize clocks */
6256 radeon_get_clock_info(rdev->ddev);
6257
6258 /* Fence driver */
6259 r = radeon_fence_driver_init(rdev);
6260 if (r)
6261 return r;
6262
6263 /* initialize memory controller */
6264 r = cik_mc_init(rdev);
6265 if (r)
6266 return r;
6267 /* Memory manager */
6268 r = radeon_bo_init(rdev);
6269 if (r)
6270 return r;
6271
6272 ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6273 ring->ring_obj = NULL;
6274 r600_ring_init(rdev, ring, 1024 * 1024);
6275
6276 ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6277 ring->ring_obj = NULL;
6278 r600_ring_init(rdev, ring, 1024 * 1024);
6279 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6280 if (r)
6281 return r;
6282
6283 ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6284 ring->ring_obj = NULL;
6285 r600_ring_init(rdev, ring, 1024 * 1024);
6286 r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6287 if (r)
6288 return r;
6289
6290 ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6291 ring->ring_obj = NULL;
6292 r600_ring_init(rdev, ring, 256 * 1024);
6293
6294 ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6295 ring->ring_obj = NULL;
6296 r600_ring_init(rdev, ring, 256 * 1024);
6297
6298 r = radeon_uvd_init(rdev);
6299 if (!r) {
6300 ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6301 ring->ring_obj = NULL;
6302 r600_ring_init(rdev, ring, 4096);
6303 }
6304
6305 rdev->ih.ring_obj = NULL;
6306 r600_ih_ring_init(rdev, 64 * 1024);
6307
6308 r = r600_pcie_gart_init(rdev);
6309 if (r)
6310 return r;
6311
6312 rdev->accel_working = true;
6313 r = cik_startup(rdev);
6314 if (r) {
6315 dev_err(rdev->dev, "disabling GPU acceleration\n");
6316 cik_cp_fini(rdev);
6317 cik_sdma_fini(rdev);
6318 cik_irq_fini(rdev);
6319 si_rlc_fini(rdev);
6320 cik_mec_fini(rdev);
6321 radeon_wb_fini(rdev);
6322 radeon_ib_pool_fini(rdev);
6323 radeon_vm_manager_fini(rdev);
6324 radeon_irq_kms_fini(rdev);
6325 cik_pcie_gart_fini(rdev);
6326 rdev->accel_working = false;
6327 }
6328
6329 /* Don't start up if the MC ucode is missing.
6330 * The default clocks and voltages before the MC ucode
6331 * is loaded are not suffient for advanced operations.
6332 */
6333 if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6334 DRM_ERROR("radeon: MC ucode required for NI+.\n");
6335 return -EINVAL;
6336 }
6337
6338 return 0;
6339 }
6340
6341 /**
6342 * cik_fini - asic specific driver and hw fini
6343 *
6344 * @rdev: radeon_device pointer
6345 *
6346 * Tear down the asic specific driver variables and program the hw
6347 * to an idle state (CIK).
6348 * Called at driver unload.
6349 */
6350 void cik_fini(struct radeon_device *rdev)
6351 {
6352 cik_cp_fini(rdev);
6353 cik_sdma_fini(rdev);
6354 cik_irq_fini(rdev);
6355 si_rlc_fini(rdev);
6356 cik_mec_fini(rdev);
6357 radeon_wb_fini(rdev);
6358 radeon_vm_manager_fini(rdev);
6359 radeon_ib_pool_fini(rdev);
6360 radeon_irq_kms_fini(rdev);
6361 radeon_uvd_fini(rdev);
6362 cik_pcie_gart_fini(rdev);
6363 r600_vram_scratch_fini(rdev);
6364 radeon_gem_fini(rdev);
6365 radeon_fence_driver_fini(rdev);
6366 radeon_bo_fini(rdev);
6367 radeon_atombios_fini(rdev);
6368 kfree(rdev->bios);
6369 rdev->bios = NULL;
6370 }
6371
6372 /* display watermark setup */
6373 /**
6374 * dce8_line_buffer_adjust - Set up the line buffer
6375 *
6376 * @rdev: radeon_device pointer
6377 * @radeon_crtc: the selected display controller
6378 * @mode: the current display mode on the selected display
6379 * controller
6380 *
6381 * Setup up the line buffer allocation for
6382 * the selected display controller (CIK).
6383 * Returns the line buffer size in pixels.
6384 */
6385 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6386 struct radeon_crtc *radeon_crtc,
6387 struct drm_display_mode *mode)
6388 {
6389 u32 tmp;
6390
6391 /*
6392 * Line Buffer Setup
6393 * There are 6 line buffers, one for each display controllers.
6394 * There are 3 partitions per LB. Select the number of partitions
6395 * to enable based on the display width. For display widths larger
6396 * than 4096, you need use to use 2 display controllers and combine
6397 * them using the stereo blender.
6398 */
6399 if (radeon_crtc->base.enabled && mode) {
6400 if (mode->crtc_hdisplay < 1920)
6401 tmp = 1;
6402 else if (mode->crtc_hdisplay < 2560)
6403 tmp = 2;
6404 else if (mode->crtc_hdisplay < 4096)
6405 tmp = 0;
6406 else {
6407 DRM_DEBUG_KMS("Mode too big for LB!\n");
6408 tmp = 0;
6409 }
6410 } else
6411 tmp = 1;
6412
6413 WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6414 LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6415
6416 if (radeon_crtc->base.enabled && mode) {
6417 switch (tmp) {
6418 case 0:
6419 default:
6420 return 4096 * 2;
6421 case 1:
6422 return 1920 * 2;
6423 case 2:
6424 return 2560 * 2;
6425 }
6426 }
6427
6428 /* controller not enabled, so no lb used */
6429 return 0;
6430 }
6431
6432 /**
6433 * cik_get_number_of_dram_channels - get the number of dram channels
6434 *
6435 * @rdev: radeon_device pointer
6436 *
6437 * Look up the number of video ram channels (CIK).
6438 * Used for display watermark bandwidth calculations
6439 * Returns the number of dram channels
6440 */
6441 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6442 {
6443 u32 tmp = RREG32(MC_SHARED_CHMAP);
6444
6445 switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6446 case 0:
6447 default:
6448 return 1;
6449 case 1:
6450 return 2;
6451 case 2:
6452 return 4;
6453 case 3:
6454 return 8;
6455 case 4:
6456 return 3;
6457 case 5:
6458 return 6;
6459 case 6:
6460 return 10;
6461 case 7:
6462 return 12;
6463 case 8:
6464 return 16;
6465 }
6466 }
6467
6468 struct dce8_wm_params {
6469 u32 dram_channels; /* number of dram channels */
6470 u32 yclk; /* bandwidth per dram data pin in kHz */
6471 u32 sclk; /* engine clock in kHz */
6472 u32 disp_clk; /* display clock in kHz */
6473 u32 src_width; /* viewport width */
6474 u32 active_time; /* active display time in ns */
6475 u32 blank_time; /* blank time in ns */
6476 bool interlaced; /* mode is interlaced */
6477 fixed20_12 vsc; /* vertical scale ratio */
6478 u32 num_heads; /* number of active crtcs */
6479 u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6480 u32 lb_size; /* line buffer allocated to pipe */
6481 u32 vtaps; /* vertical scaler taps */
6482 };
6483
6484 /**
6485 * dce8_dram_bandwidth - get the dram bandwidth
6486 *
6487 * @wm: watermark calculation data
6488 *
6489 * Calculate the raw dram bandwidth (CIK).
6490 * Used for display watermark bandwidth calculations
6491 * Returns the dram bandwidth in MBytes/s
6492 */
6493 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6494 {
6495 /* Calculate raw DRAM Bandwidth */
6496 fixed20_12 dram_efficiency; /* 0.7 */
6497 fixed20_12 yclk, dram_channels, bandwidth;
6498 fixed20_12 a;
6499
6500 a.full = dfixed_const(1000);
6501 yclk.full = dfixed_const(wm->yclk);
6502 yclk.full = dfixed_div(yclk, a);
6503 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6504 a.full = dfixed_const(10);
6505 dram_efficiency.full = dfixed_const(7);
6506 dram_efficiency.full = dfixed_div(dram_efficiency, a);
6507 bandwidth.full = dfixed_mul(dram_channels, yclk);
6508 bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6509
6510 return dfixed_trunc(bandwidth);
6511 }
6512
6513 /**
6514 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6515 *
6516 * @wm: watermark calculation data
6517 *
6518 * Calculate the dram bandwidth used for display (CIK).
6519 * Used for display watermark bandwidth calculations
6520 * Returns the dram bandwidth for display in MBytes/s
6521 */
6522 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6523 {
6524 /* Calculate DRAM Bandwidth and the part allocated to display. */
6525 fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6526 fixed20_12 yclk, dram_channels, bandwidth;
6527 fixed20_12 a;
6528
6529 a.full = dfixed_const(1000);
6530 yclk.full = dfixed_const(wm->yclk);
6531 yclk.full = dfixed_div(yclk, a);
6532 dram_channels.full = dfixed_const(wm->dram_channels * 4);
6533 a.full = dfixed_const(10);
6534 disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6535 disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6536 bandwidth.full = dfixed_mul(dram_channels, yclk);
6537 bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6538
6539 return dfixed_trunc(bandwidth);
6540 }
6541
6542 /**
6543 * dce8_data_return_bandwidth - get the data return bandwidth
6544 *
6545 * @wm: watermark calculation data
6546 *
6547 * Calculate the data return bandwidth used for display (CIK).
6548 * Used for display watermark bandwidth calculations
6549 * Returns the data return bandwidth in MBytes/s
6550 */
6551 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6552 {
6553 /* Calculate the display Data return Bandwidth */
6554 fixed20_12 return_efficiency; /* 0.8 */
6555 fixed20_12 sclk, bandwidth;
6556 fixed20_12 a;
6557
6558 a.full = dfixed_const(1000);
6559 sclk.full = dfixed_const(wm->sclk);
6560 sclk.full = dfixed_div(sclk, a);
6561 a.full = dfixed_const(10);
6562 return_efficiency.full = dfixed_const(8);
6563 return_efficiency.full = dfixed_div(return_efficiency, a);
6564 a.full = dfixed_const(32);
6565 bandwidth.full = dfixed_mul(a, sclk);
6566 bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6567
6568 return dfixed_trunc(bandwidth);
6569 }
6570
6571 /**
6572 * dce8_dmif_request_bandwidth - get the dmif bandwidth
6573 *
6574 * @wm: watermark calculation data
6575 *
6576 * Calculate the dmif bandwidth used for display (CIK).
6577 * Used for display watermark bandwidth calculations
6578 * Returns the dmif bandwidth in MBytes/s
6579 */
6580 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6581 {
6582 /* Calculate the DMIF Request Bandwidth */
6583 fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6584 fixed20_12 disp_clk, bandwidth;
6585 fixed20_12 a, b;
6586
6587 a.full = dfixed_const(1000);
6588 disp_clk.full = dfixed_const(wm->disp_clk);
6589 disp_clk.full = dfixed_div(disp_clk, a);
6590 a.full = dfixed_const(32);
6591 b.full = dfixed_mul(a, disp_clk);
6592
6593 a.full = dfixed_const(10);
6594 disp_clk_request_efficiency.full = dfixed_const(8);
6595 disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6596
6597 bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6598
6599 return dfixed_trunc(bandwidth);
6600 }
6601
6602 /**
6603 * dce8_available_bandwidth - get the min available bandwidth
6604 *
6605 * @wm: watermark calculation data
6606 *
6607 * Calculate the min available bandwidth used for display (CIK).
6608 * Used for display watermark bandwidth calculations
6609 * Returns the min available bandwidth in MBytes/s
6610 */
6611 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6612 {
6613 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6614 u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6615 u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6616 u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6617
6618 return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6619 }
6620
6621 /**
6622 * dce8_average_bandwidth - get the average available bandwidth
6623 *
6624 * @wm: watermark calculation data
6625 *
6626 * Calculate the average available bandwidth used for display (CIK).
6627 * Used for display watermark bandwidth calculations
6628 * Returns the average available bandwidth in MBytes/s
6629 */
6630 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6631 {
6632 /* Calculate the display mode Average Bandwidth
6633 * DisplayMode should contain the source and destination dimensions,
6634 * timing, etc.
6635 */
6636 fixed20_12 bpp;
6637 fixed20_12 line_time;
6638 fixed20_12 src_width;
6639 fixed20_12 bandwidth;
6640 fixed20_12 a;
6641
6642 a.full = dfixed_const(1000);
6643 line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6644 line_time.full = dfixed_div(line_time, a);
6645 bpp.full = dfixed_const(wm->bytes_per_pixel);
6646 src_width.full = dfixed_const(wm->src_width);
6647 bandwidth.full = dfixed_mul(src_width, bpp);
6648 bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6649 bandwidth.full = dfixed_div(bandwidth, line_time);
6650
6651 return dfixed_trunc(bandwidth);
6652 }
6653
6654 /**
6655 * dce8_latency_watermark - get the latency watermark
6656 *
6657 * @wm: watermark calculation data
6658 *
6659 * Calculate the latency watermark (CIK).
6660 * Used for display watermark bandwidth calculations
6661 * Returns the latency watermark in ns
6662 */
6663 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6664 {
6665 /* First calculate the latency in ns */
6666 u32 mc_latency = 2000; /* 2000 ns. */
6667 u32 available_bandwidth = dce8_available_bandwidth(wm);
6668 u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6669 u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6670 u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6671 u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6672 (wm->num_heads * cursor_line_pair_return_time);
6673 u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6674 u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6675 u32 tmp, dmif_size = 12288;
6676 fixed20_12 a, b, c;
6677
6678 if (wm->num_heads == 0)
6679 return 0;
6680
6681 a.full = dfixed_const(2);
6682 b.full = dfixed_const(1);
6683 if ((wm->vsc.full > a.full) ||
6684 ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6685 (wm->vtaps >= 5) ||
6686 ((wm->vsc.full >= a.full) && wm->interlaced))
6687 max_src_lines_per_dst_line = 4;
6688 else
6689 max_src_lines_per_dst_line = 2;
6690
6691 a.full = dfixed_const(available_bandwidth);
6692 b.full = dfixed_const(wm->num_heads);
6693 a.full = dfixed_div(a, b);
6694
6695 b.full = dfixed_const(mc_latency + 512);
6696 c.full = dfixed_const(wm->disp_clk);
6697 b.full = dfixed_div(b, c);
6698
6699 c.full = dfixed_const(dmif_size);
6700 b.full = dfixed_div(c, b);
6701
6702 tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6703
6704 b.full = dfixed_const(1000);
6705 c.full = dfixed_const(wm->disp_clk);
6706 b.full = dfixed_div(c, b);
6707 c.full = dfixed_const(wm->bytes_per_pixel);
6708 b.full = dfixed_mul(b, c);
6709
6710 lb_fill_bw = min(tmp, dfixed_trunc(b));
6711
6712 a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6713 b.full = dfixed_const(1000);
6714 c.full = dfixed_const(lb_fill_bw);
6715 b.full = dfixed_div(c, b);
6716 a.full = dfixed_div(a, b);
6717 line_fill_time = dfixed_trunc(a);
6718
6719 if (line_fill_time < wm->active_time)
6720 return latency;
6721 else
6722 return latency + (line_fill_time - wm->active_time);
6723
6724 }
6725
6726 /**
6727 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6728 * average and available dram bandwidth
6729 *
6730 * @wm: watermark calculation data
6731 *
6732 * Check if the display average bandwidth fits in the display
6733 * dram bandwidth (CIK).
6734 * Used for display watermark bandwidth calculations
6735 * Returns true if the display fits, false if not.
6736 */
6737 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6738 {
6739 if (dce8_average_bandwidth(wm) <=
6740 (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6741 return true;
6742 else
6743 return false;
6744 }
6745
6746 /**
6747 * dce8_average_bandwidth_vs_available_bandwidth - check
6748 * average and available bandwidth
6749 *
6750 * @wm: watermark calculation data
6751 *
6752 * Check if the display average bandwidth fits in the display
6753 * available bandwidth (CIK).
6754 * Used for display watermark bandwidth calculations
6755 * Returns true if the display fits, false if not.
6756 */
6757 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6758 {
6759 if (dce8_average_bandwidth(wm) <=
6760 (dce8_available_bandwidth(wm) / wm->num_heads))
6761 return true;
6762 else
6763 return false;
6764 }
6765
6766 /**
6767 * dce8_check_latency_hiding - check latency hiding
6768 *
6769 * @wm: watermark calculation data
6770 *
6771 * Check latency hiding (CIK).
6772 * Used for display watermark bandwidth calculations
6773 * Returns true if the display fits, false if not.
6774 */
6775 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6776 {
6777 u32 lb_partitions = wm->lb_size / wm->src_width;
6778 u32 line_time = wm->active_time + wm->blank_time;
6779 u32 latency_tolerant_lines;
6780 u32 latency_hiding;
6781 fixed20_12 a;
6782
6783 a.full = dfixed_const(1);
6784 if (wm->vsc.full > a.full)
6785 latency_tolerant_lines = 1;
6786 else {
6787 if (lb_partitions <= (wm->vtaps + 1))
6788 latency_tolerant_lines = 1;
6789 else
6790 latency_tolerant_lines = 2;
6791 }
6792
6793 latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6794
6795 if (dce8_latency_watermark(wm) <= latency_hiding)
6796 return true;
6797 else
6798 return false;
6799 }
6800
6801 /**
6802 * dce8_program_watermarks - program display watermarks
6803 *
6804 * @rdev: radeon_device pointer
6805 * @radeon_crtc: the selected display controller
6806 * @lb_size: line buffer size
6807 * @num_heads: number of display controllers in use
6808 *
6809 * Calculate and program the display watermarks for the
6810 * selected display controller (CIK).
6811 */
6812 static void dce8_program_watermarks(struct radeon_device *rdev,
6813 struct radeon_crtc *radeon_crtc,
6814 u32 lb_size, u32 num_heads)
6815 {
6816 struct drm_display_mode *mode = &radeon_crtc->base.mode;
6817 struct dce8_wm_params wm;
6818 u32 pixel_period;
6819 u32 line_time = 0;
6820 u32 latency_watermark_a = 0, latency_watermark_b = 0;
6821 u32 tmp, wm_mask;
6822
6823 if (radeon_crtc->base.enabled && num_heads && mode) {
6824 pixel_period = 1000000 / (u32)mode->clock;
6825 line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6826
6827 wm.yclk = rdev->pm.current_mclk * 10;
6828 wm.sclk = rdev->pm.current_sclk * 10;
6829 wm.disp_clk = mode->clock;
6830 wm.src_width = mode->crtc_hdisplay;
6831 wm.active_time = mode->crtc_hdisplay * pixel_period;
6832 wm.blank_time = line_time - wm.active_time;
6833 wm.interlaced = false;
6834 if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6835 wm.interlaced = true;
6836 wm.vsc = radeon_crtc->vsc;
6837 wm.vtaps = 1;
6838 if (radeon_crtc->rmx_type != RMX_OFF)
6839 wm.vtaps = 2;
6840 wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6841 wm.lb_size = lb_size;
6842 wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6843 wm.num_heads = num_heads;
6844
6845 /* set for high clocks */
6846 latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6847 /* set for low clocks */
6848 /* wm.yclk = low clk; wm.sclk = low clk */
6849 latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6850
6851 /* possibly force display priority to high */
6852 /* should really do this at mode validation time... */
6853 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6854 !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6855 !dce8_check_latency_hiding(&wm) ||
6856 (rdev->disp_priority == 2)) {
6857 DRM_DEBUG_KMS("force priority to high\n");
6858 }
6859 }
6860
6861 /* select wm A */
6862 wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6863 tmp = wm_mask;
6864 tmp &= ~LATENCY_WATERMARK_MASK(3);
6865 tmp |= LATENCY_WATERMARK_MASK(1);
6866 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6867 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6868 (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6869 LATENCY_HIGH_WATERMARK(line_time)));
6870 /* select wm B */
6871 tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6872 tmp &= ~LATENCY_WATERMARK_MASK(3);
6873 tmp |= LATENCY_WATERMARK_MASK(2);
6874 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6875 WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6876 (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6877 LATENCY_HIGH_WATERMARK(line_time)));
6878 /* restore original selection */
6879 WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6880 }
6881
6882 /**
6883 * dce8_bandwidth_update - program display watermarks
6884 *
6885 * @rdev: radeon_device pointer
6886 *
6887 * Calculate and program the display watermarks and line
6888 * buffer allocation (CIK).
6889 */
6890 void dce8_bandwidth_update(struct radeon_device *rdev)
6891 {
6892 struct drm_display_mode *mode = NULL;
6893 u32 num_heads = 0, lb_size;
6894 int i;
6895
6896 radeon_update_display_priority(rdev);
6897
6898 for (i = 0; i < rdev->num_crtc; i++) {
6899 if (rdev->mode_info.crtcs[i]->base.enabled)
6900 num_heads++;
6901 }
6902 for (i = 0; i < rdev->num_crtc; i++) {
6903 mode = &rdev->mode_info.crtcs[i]->base.mode;
6904 lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6905 dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6906 }
6907 }
6908
6909 /**
6910 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6911 *
6912 * @rdev: radeon_device pointer
6913 *
6914 * Fetches a GPU clock counter snapshot (SI).
6915 * Returns the 64 bit clock counter snapshot.
6916 */
6917 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6918 {
6919 uint64_t clock;
6920
6921 mutex_lock(&rdev->gpu_clock_mutex);
6922 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6923 clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6924 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6925 mutex_unlock(&rdev->gpu_clock_mutex);
6926 return clock;
6927 }
6928
6929 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6930 u32 cntl_reg, u32 status_reg)
6931 {
6932 int r, i;
6933 struct atom_clock_dividers dividers;
6934 uint32_t tmp;
6935
6936 r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6937 clock, false, &dividers);
6938 if (r)
6939 return r;
6940
6941 tmp = RREG32_SMC(cntl_reg);
6942 tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6943 tmp |= dividers.post_divider;
6944 WREG32_SMC(cntl_reg, tmp);
6945
6946 for (i = 0; i < 100; i++) {
6947 if (RREG32_SMC(status_reg) & DCLK_STATUS)
6948 break;
6949 mdelay(10);
6950 }
6951 if (i == 100)
6952 return -ETIMEDOUT;
6953
6954 return 0;
6955 }
6956
6957 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6958 {
6959 int r = 0;
6960
6961 r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6962 if (r)
6963 return r;
6964
6965 r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6966 return r;
6967 }
6968
6969 int cik_uvd_resume(struct radeon_device *rdev)
6970 {
6971 uint64_t addr;
6972 uint32_t size;
6973 int r;
6974
6975 r = radeon_uvd_resume(rdev);
6976 if (r)
6977 return r;
6978
6979 /* programm the VCPU memory controller bits 0-27 */
6980 addr = rdev->uvd.gpu_addr >> 3;
6981 size = RADEON_GPU_PAGE_ALIGN(rdev->uvd.fw_size + 4) >> 3;
6982 WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6983 WREG32(UVD_VCPU_CACHE_SIZE0, size);
6984
6985 addr += size;
6986 size = RADEON_UVD_STACK_SIZE >> 3;
6987 WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6988 WREG32(UVD_VCPU_CACHE_SIZE1, size);
6989
6990 addr += size;
6991 size = RADEON_UVD_HEAP_SIZE >> 3;
6992 WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6993 WREG32(UVD_VCPU_CACHE_SIZE2, size);
6994
6995 /* bits 28-31 */
6996 addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6997 WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6998
6999 /* bits 32-39 */
7000 addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
7001 WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
7002
7003 return 0;
7004 }
This page took 0.300213 seconds and 6 git commands to generate.