2 * Copyright 2012 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
22 * Authors: Alex Deucher
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
30 #include "radeon_asic.h"
33 #include "cik_blit_shaders.h"
36 #define CIK_PFP_UCODE_SIZE 2144
37 #define CIK_ME_UCODE_SIZE 2144
38 #define CIK_CE_UCODE_SIZE 2144
40 #define CIK_MEC_UCODE_SIZE 4192
42 #define BONAIRE_RLC_UCODE_SIZE 2048
43 #define KB_RLC_UCODE_SIZE 2560
44 #define KV_RLC_UCODE_SIZE 2560
46 #define CIK_MC_UCODE_SIZE 7866
48 #define CIK_SDMA_UCODE_SIZE 1050
49 #define CIK_SDMA_UCODE_VERSION 64
51 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
64 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65 MODULE_FIRMWARE("radeon/KABINI_me.bin");
66 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
69 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
71 extern int r600_ih_ring_alloc(struct radeon_device
*rdev
);
72 extern void r600_ih_ring_fini(struct radeon_device
*rdev
);
73 extern void evergreen_mc_stop(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
74 extern void evergreen_mc_resume(struct radeon_device
*rdev
, struct evergreen_mc_save
*save
);
75 extern bool evergreen_is_display_hung(struct radeon_device
*rdev
);
76 extern void si_vram_gtt_location(struct radeon_device
*rdev
, struct radeon_mc
*mc
);
77 extern void si_rlc_fini(struct radeon_device
*rdev
);
78 extern int si_rlc_init(struct radeon_device
*rdev
);
79 static void cik_rlc_stop(struct radeon_device
*rdev
);
82 * Indirect registers accessor
84 u32
cik_pciep_rreg(struct radeon_device
*rdev
, u32 reg
)
88 WREG32(PCIE_INDEX
, reg
);
89 (void)RREG32(PCIE_INDEX
);
90 r
= RREG32(PCIE_DATA
);
94 void cik_pciep_wreg(struct radeon_device
*rdev
, u32 reg
, u32 v
)
96 WREG32(PCIE_INDEX
, reg
);
97 (void)RREG32(PCIE_INDEX
);
99 (void)RREG32(PCIE_DATA
);
103 * cik_get_xclk - get the xclk
105 * @rdev: radeon_device pointer
107 * Returns the reference clock used by the gfx engine
110 u32
cik_get_xclk(struct radeon_device
*rdev
)
112 u32 reference_clock
= rdev
->clock
.spll
.reference_freq
;
114 if (rdev
->flags
& RADEON_IS_IGP
) {
115 if (RREG32_SMC(GENERAL_PWRMGT
) & GPU_COUNTER_CLK
)
116 return reference_clock
/ 2;
118 if (RREG32_SMC(CG_CLKPIN_CNTL
) & XTALIN_DIVIDE
)
119 return reference_clock
/ 4;
121 return reference_clock
;
125 * cik_mm_rdoorbell - read a doorbell dword
127 * @rdev: radeon_device pointer
128 * @offset: byte offset into the aperture
130 * Returns the value in the doorbell aperture at the
131 * requested offset (CIK).
133 u32
cik_mm_rdoorbell(struct radeon_device
*rdev
, u32 offset
)
135 if (offset
< rdev
->doorbell
.size
) {
136 return readl(((void __iomem
*)rdev
->doorbell
.ptr
) + offset
);
138 DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset
);
144 * cik_mm_wdoorbell - write a doorbell dword
146 * @rdev: radeon_device pointer
147 * @offset: byte offset into the aperture
150 * Writes @v to the doorbell aperture at the
151 * requested offset (CIK).
153 void cik_mm_wdoorbell(struct radeon_device
*rdev
, u32 offset
, u32 v
)
155 if (offset
< rdev
->doorbell
.size
) {
156 writel(v
, ((void __iomem
*)rdev
->doorbell
.ptr
) + offset
);
158 DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset
);
162 #define BONAIRE_IO_MC_REGS_SIZE 36
164 static const u32 bonaire_io_mc_regs
[BONAIRE_IO_MC_REGS_SIZE
][2] =
166 {0x00000070, 0x04400000},
167 {0x00000071, 0x80c01803},
168 {0x00000072, 0x00004004},
169 {0x00000073, 0x00000100},
170 {0x00000074, 0x00ff0000},
171 {0x00000075, 0x34000000},
172 {0x00000076, 0x08000014},
173 {0x00000077, 0x00cc08ec},
174 {0x00000078, 0x00000400},
175 {0x00000079, 0x00000000},
176 {0x0000007a, 0x04090000},
177 {0x0000007c, 0x00000000},
178 {0x0000007e, 0x4408a8e8},
179 {0x0000007f, 0x00000304},
180 {0x00000080, 0x00000000},
181 {0x00000082, 0x00000001},
182 {0x00000083, 0x00000002},
183 {0x00000084, 0xf3e4f400},
184 {0x00000085, 0x052024e3},
185 {0x00000087, 0x00000000},
186 {0x00000088, 0x01000000},
187 {0x0000008a, 0x1c0a0000},
188 {0x0000008b, 0xff010000},
189 {0x0000008d, 0xffffefff},
190 {0x0000008e, 0xfff3efff},
191 {0x0000008f, 0xfff3efbf},
192 {0x00000092, 0xf7ffffff},
193 {0x00000093, 0xffffff7f},
194 {0x00000095, 0x00101101},
195 {0x00000096, 0x00000fff},
196 {0x00000097, 0x00116fff},
197 {0x00000098, 0x60010000},
198 {0x00000099, 0x10010000},
199 {0x0000009a, 0x00006000},
200 {0x0000009b, 0x00001000},
201 {0x0000009f, 0x00b48000}
205 * cik_srbm_select - select specific register instances
207 * @rdev: radeon_device pointer
208 * @me: selected ME (micro engine)
213 * Switches the currently active registers instances. Some
214 * registers are instanced per VMID, others are instanced per
215 * me/pipe/queue combination.
217 static void cik_srbm_select(struct radeon_device
*rdev
,
218 u32 me
, u32 pipe
, u32 queue
, u32 vmid
)
220 u32 srbm_gfx_cntl
= (PIPEID(pipe
& 0x3) |
223 QUEUEID(queue
& 0x7));
224 WREG32(SRBM_GFX_CNTL
, srbm_gfx_cntl
);
229 * ci_mc_load_microcode - load MC ucode into the hw
231 * @rdev: radeon_device pointer
233 * Load the GDDR MC ucode into the hw (CIK).
234 * Returns 0 on success, error on failure.
236 static int ci_mc_load_microcode(struct radeon_device
*rdev
)
238 const __be32
*fw_data
;
239 u32 running
, blackout
= 0;
241 int i
, ucode_size
, regs_size
;
246 switch (rdev
->family
) {
249 io_mc_regs
= (u32
*)&bonaire_io_mc_regs
;
250 ucode_size
= CIK_MC_UCODE_SIZE
;
251 regs_size
= BONAIRE_IO_MC_REGS_SIZE
;
255 running
= RREG32(MC_SEQ_SUP_CNTL
) & RUN_MASK
;
259 blackout
= RREG32(MC_SHARED_BLACKOUT_CNTL
);
260 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
| 1);
263 /* reset the engine and set to writable */
264 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
265 WREG32(MC_SEQ_SUP_CNTL
, 0x00000010);
267 /* load mc io regs */
268 for (i
= 0; i
< regs_size
; i
++) {
269 WREG32(MC_SEQ_IO_DEBUG_INDEX
, io_mc_regs
[(i
<< 1)]);
270 WREG32(MC_SEQ_IO_DEBUG_DATA
, io_mc_regs
[(i
<< 1) + 1]);
272 /* load the MC ucode */
273 fw_data
= (const __be32
*)rdev
->mc_fw
->data
;
274 for (i
= 0; i
< ucode_size
; i
++)
275 WREG32(MC_SEQ_SUP_PGM
, be32_to_cpup(fw_data
++));
277 /* put the engine back into the active state */
278 WREG32(MC_SEQ_SUP_CNTL
, 0x00000008);
279 WREG32(MC_SEQ_SUP_CNTL
, 0x00000004);
280 WREG32(MC_SEQ_SUP_CNTL
, 0x00000001);
282 /* wait for training to complete */
283 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
284 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D0
)
288 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
289 if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL
) & TRAIN_DONE_D1
)
295 WREG32(MC_SHARED_BLACKOUT_CNTL
, blackout
);
302 * cik_init_microcode - load ucode images from disk
304 * @rdev: radeon_device pointer
306 * Use the firmware interface to load the ucode images into
307 * the driver (not loaded into hw).
308 * Returns 0 on success, error on failure.
310 static int cik_init_microcode(struct radeon_device
*rdev
)
312 struct platform_device
*pdev
;
313 const char *chip_name
;
314 size_t pfp_req_size
, me_req_size
, ce_req_size
,
315 mec_req_size
, rlc_req_size
, mc_req_size
,
322 pdev
= platform_device_register_simple("radeon_cp", 0, NULL
, 0);
325 printk(KERN_ERR
"radeon_cp: Failed to register firmware\n");
329 switch (rdev
->family
) {
331 chip_name
= "BONAIRE";
332 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
333 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
334 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
335 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
336 rlc_req_size
= BONAIRE_RLC_UCODE_SIZE
* 4;
337 mc_req_size
= CIK_MC_UCODE_SIZE
* 4;
338 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
341 chip_name
= "KAVERI";
342 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
343 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
344 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
345 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
346 rlc_req_size
= KV_RLC_UCODE_SIZE
* 4;
347 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
350 chip_name
= "KABINI";
351 pfp_req_size
= CIK_PFP_UCODE_SIZE
* 4;
352 me_req_size
= CIK_ME_UCODE_SIZE
* 4;
353 ce_req_size
= CIK_CE_UCODE_SIZE
* 4;
354 mec_req_size
= CIK_MEC_UCODE_SIZE
* 4;
355 rlc_req_size
= KB_RLC_UCODE_SIZE
* 4;
356 sdma_req_size
= CIK_SDMA_UCODE_SIZE
* 4;
361 DRM_INFO("Loading %s Microcode\n", chip_name
);
363 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_pfp.bin", chip_name
);
364 err
= request_firmware(&rdev
->pfp_fw
, fw_name
, &pdev
->dev
);
367 if (rdev
->pfp_fw
->size
!= pfp_req_size
) {
369 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
370 rdev
->pfp_fw
->size
, fw_name
);
375 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_me.bin", chip_name
);
376 err
= request_firmware(&rdev
->me_fw
, fw_name
, &pdev
->dev
);
379 if (rdev
->me_fw
->size
!= me_req_size
) {
381 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
382 rdev
->me_fw
->size
, fw_name
);
386 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_ce.bin", chip_name
);
387 err
= request_firmware(&rdev
->ce_fw
, fw_name
, &pdev
->dev
);
390 if (rdev
->ce_fw
->size
!= ce_req_size
) {
392 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
393 rdev
->ce_fw
->size
, fw_name
);
397 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mec.bin", chip_name
);
398 err
= request_firmware(&rdev
->mec_fw
, fw_name
, &pdev
->dev
);
401 if (rdev
->mec_fw
->size
!= mec_req_size
) {
403 "cik_cp: Bogus length %zu in firmware \"%s\"\n",
404 rdev
->mec_fw
->size
, fw_name
);
408 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_rlc.bin", chip_name
);
409 err
= request_firmware(&rdev
->rlc_fw
, fw_name
, &pdev
->dev
);
412 if (rdev
->rlc_fw
->size
!= rlc_req_size
) {
414 "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
415 rdev
->rlc_fw
->size
, fw_name
);
419 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_sdma.bin", chip_name
);
420 err
= request_firmware(&rdev
->sdma_fw
, fw_name
, &pdev
->dev
);
423 if (rdev
->sdma_fw
->size
!= sdma_req_size
) {
425 "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
426 rdev
->sdma_fw
->size
, fw_name
);
430 /* No MC ucode on APUs */
431 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
432 snprintf(fw_name
, sizeof(fw_name
), "radeon/%s_mc.bin", chip_name
);
433 err
= request_firmware(&rdev
->mc_fw
, fw_name
, &pdev
->dev
);
436 if (rdev
->mc_fw
->size
!= mc_req_size
) {
438 "cik_mc: Bogus length %zu in firmware \"%s\"\n",
439 rdev
->mc_fw
->size
, fw_name
);
445 platform_device_unregister(pdev
);
450 "cik_cp: Failed to load firmware \"%s\"\n",
452 release_firmware(rdev
->pfp_fw
);
454 release_firmware(rdev
->me_fw
);
456 release_firmware(rdev
->ce_fw
);
458 release_firmware(rdev
->rlc_fw
);
460 release_firmware(rdev
->mc_fw
);
470 * cik_tiling_mode_table_init - init the hw tiling table
472 * @rdev: radeon_device pointer
474 * Starting with SI, the tiling setup is done globally in a
475 * set of 32 tiling modes. Rather than selecting each set of
476 * parameters per surface as on older asics, we just select
477 * which index in the tiling table we want to use, and the
478 * surface uses those parameters (CIK).
480 static void cik_tiling_mode_table_init(struct radeon_device
*rdev
)
482 const u32 num_tile_mode_states
= 32;
483 const u32 num_secondary_tile_mode_states
= 16;
484 u32 reg_offset
, gb_tile_moden
, split_equal_to_row_size
;
485 u32 num_pipe_configs
;
486 u32 num_rbs
= rdev
->config
.cik
.max_backends_per_se
*
487 rdev
->config
.cik
.max_shader_engines
;
489 switch (rdev
->config
.cik
.mem_row_size_in_kb
) {
491 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_1KB
;
495 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_2KB
;
498 split_equal_to_row_size
= ADDR_SURF_TILE_SPLIT_4KB
;
502 num_pipe_configs
= rdev
->config
.cik
.max_tile_pipes
;
503 if (num_pipe_configs
> 8)
504 num_pipe_configs
= 8; /* ??? */
506 if (num_pipe_configs
== 8) {
507 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
508 switch (reg_offset
) {
510 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
516 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
518 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
519 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
522 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
528 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
530 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
531 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
534 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
536 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
537 TILE_SPLIT(split_equal_to_row_size
));
540 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
544 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
545 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
547 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
550 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
551 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
552 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
553 TILE_SPLIT(split_equal_to_row_size
));
556 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
557 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
));
560 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
561 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
564 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
565 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
567 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
570 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
571 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
572 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
573 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
576 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
577 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
579 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
582 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
583 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
586 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
587 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
588 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
589 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
592 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
593 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
594 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
595 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
598 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
599 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
600 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
601 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
604 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
605 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
608 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
609 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
614 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
615 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
616 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16
) |
617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
620 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
621 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16
) |
623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
629 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
631 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
632 switch (reg_offset
) {
634 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
637 NUM_BANKS(ADDR_SURF_16_BANK
));
640 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
641 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
642 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
643 NUM_BANKS(ADDR_SURF_16_BANK
));
646 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
649 NUM_BANKS(ADDR_SURF_16_BANK
));
652 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
653 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
654 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
655 NUM_BANKS(ADDR_SURF_16_BANK
));
658 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
661 NUM_BANKS(ADDR_SURF_8_BANK
));
664 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
665 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
666 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
667 NUM_BANKS(ADDR_SURF_4_BANK
));
670 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
673 NUM_BANKS(ADDR_SURF_2_BANK
));
676 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
677 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
678 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
679 NUM_BANKS(ADDR_SURF_16_BANK
));
682 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
685 NUM_BANKS(ADDR_SURF_16_BANK
));
688 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
689 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
690 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
691 NUM_BANKS(ADDR_SURF_16_BANK
));
694 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
695 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
696 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
697 NUM_BANKS(ADDR_SURF_16_BANK
));
700 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
701 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
702 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
703 NUM_BANKS(ADDR_SURF_8_BANK
));
706 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
707 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
708 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
709 NUM_BANKS(ADDR_SURF_4_BANK
));
712 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
713 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
714 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
715 NUM_BANKS(ADDR_SURF_2_BANK
));
721 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
723 } else if (num_pipe_configs
== 4) {
725 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
726 switch (reg_offset
) {
728 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
729 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
730 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
731 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
734 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
735 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
736 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
737 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
740 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
741 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
742 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
743 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
746 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
747 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
748 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
749 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
752 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
753 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
754 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
755 TILE_SPLIT(split_equal_to_row_size
));
758 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
759 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
762 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
763 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
764 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
765 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
768 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
769 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
770 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
771 TILE_SPLIT(split_equal_to_row_size
));
774 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
775 PIPE_CONFIG(ADDR_SURF_P4_16x16
));
778 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
779 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
782 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
783 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
784 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
788 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
789 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
790 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
791 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
794 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
795 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
796 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
800 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
801 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
804 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
805 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
806 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
807 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
810 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
811 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
812 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
816 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
817 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
818 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
819 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
822 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
823 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
826 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
827 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
828 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
829 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
832 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
833 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
834 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
835 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
838 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
839 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
840 PIPE_CONFIG(ADDR_SURF_P4_16x16
) |
841 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
847 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
849 } else if (num_rbs
< 4) {
850 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
851 switch (reg_offset
) {
853 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
854 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
855 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
859 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
860 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
861 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
862 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
865 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
866 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
867 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
871 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
872 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
873 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
874 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
877 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
878 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
879 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
880 TILE_SPLIT(split_equal_to_row_size
));
883 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
884 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
887 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
888 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
889 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
890 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
893 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
894 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
895 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
896 TILE_SPLIT(split_equal_to_row_size
));
899 gb_tile_moden
= (ARRAY_MODE(ARRAY_LINEAR_ALIGNED
) |
900 PIPE_CONFIG(ADDR_SURF_P4_8x16
));
903 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
904 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
907 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
908 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
909 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
910 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
913 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
914 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
915 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
916 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
919 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
920 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
921 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
922 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
925 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
926 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
929 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
930 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
931 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
932 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
935 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
936 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
937 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
938 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
941 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
943 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
944 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
947 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
948 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
951 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
952 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
953 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
954 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
957 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
959 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
960 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
963 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
964 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
965 PIPE_CONFIG(ADDR_SURF_P4_8x16
) |
966 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
972 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
975 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
976 switch (reg_offset
) {
978 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
979 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
980 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
981 NUM_BANKS(ADDR_SURF_16_BANK
));
984 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
985 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
986 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
987 NUM_BANKS(ADDR_SURF_16_BANK
));
990 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
991 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
992 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
993 NUM_BANKS(ADDR_SURF_16_BANK
));
996 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
997 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
998 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
999 NUM_BANKS(ADDR_SURF_16_BANK
));
1002 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1003 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1004 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1005 NUM_BANKS(ADDR_SURF_16_BANK
));
1008 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1009 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1010 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1011 NUM_BANKS(ADDR_SURF_8_BANK
));
1014 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1015 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1016 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1017 NUM_BANKS(ADDR_SURF_4_BANK
));
1020 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1021 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1022 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1023 NUM_BANKS(ADDR_SURF_16_BANK
));
1026 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1029 NUM_BANKS(ADDR_SURF_16_BANK
));
1032 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1033 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1034 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1035 NUM_BANKS(ADDR_SURF_16_BANK
));
1038 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1039 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1040 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1041 NUM_BANKS(ADDR_SURF_16_BANK
));
1044 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1045 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1046 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1047 NUM_BANKS(ADDR_SURF_16_BANK
));
1050 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1051 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1052 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1053 NUM_BANKS(ADDR_SURF_8_BANK
));
1056 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1
) |
1059 NUM_BANKS(ADDR_SURF_4_BANK
));
1065 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1067 } else if (num_pipe_configs
== 2) {
1068 for (reg_offset
= 0; reg_offset
< num_tile_mode_states
; reg_offset
++) {
1069 switch (reg_offset
) {
1071 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1072 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1073 PIPE_CONFIG(ADDR_SURF_P2
) |
1074 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B
));
1077 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1078 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1079 PIPE_CONFIG(ADDR_SURF_P2
) |
1080 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B
));
1083 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1084 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1085 PIPE_CONFIG(ADDR_SURF_P2
) |
1086 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1089 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1090 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1091 PIPE_CONFIG(ADDR_SURF_P2
) |
1092 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B
));
1095 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1096 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1097 PIPE_CONFIG(ADDR_SURF_P2
) |
1098 TILE_SPLIT(split_equal_to_row_size
));
1101 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1102 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
));
1105 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1106 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1107 PIPE_CONFIG(ADDR_SURF_P2
) |
1108 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B
));
1111 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1112 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING
) |
1113 PIPE_CONFIG(ADDR_SURF_P2
) |
1114 TILE_SPLIT(split_equal_to_row_size
));
1117 gb_tile_moden
= ARRAY_MODE(ARRAY_LINEAR_ALIGNED
);
1120 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1121 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
));
1124 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1125 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1126 PIPE_CONFIG(ADDR_SURF_P2
) |
1127 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1130 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1131 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1132 PIPE_CONFIG(ADDR_SURF_P2
) |
1133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1136 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1137 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING
) |
1138 PIPE_CONFIG(ADDR_SURF_P2
) |
1139 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1142 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1143 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
));
1146 gb_tile_moden
= (ARRAY_MODE(ARRAY_2D_TILED_THIN1
) |
1147 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1148 PIPE_CONFIG(ADDR_SURF_P2
) |
1149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1152 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1153 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1154 PIPE_CONFIG(ADDR_SURF_P2
) |
1155 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1158 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1159 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING
) |
1160 PIPE_CONFIG(ADDR_SURF_P2
) |
1161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1164 gb_tile_moden
= (ARRAY_MODE(ARRAY_1D_TILED_THIN1
) |
1165 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
));
1168 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1169 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1170 PIPE_CONFIG(ADDR_SURF_P2
) |
1171 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1174 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_TILED_THIN1
) |
1175 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1176 PIPE_CONFIG(ADDR_SURF_P2
) |
1177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1180 gb_tile_moden
= (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1
) |
1181 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING
) |
1182 PIPE_CONFIG(ADDR_SURF_P2
) |
1183 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2
));
1189 WREG32(GB_TILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1191 for (reg_offset
= 0; reg_offset
< num_secondary_tile_mode_states
; reg_offset
++) {
1192 switch (reg_offset
) {
1194 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1197 NUM_BANKS(ADDR_SURF_16_BANK
));
1200 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1201 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1202 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1203 NUM_BANKS(ADDR_SURF_16_BANK
));
1206 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1209 NUM_BANKS(ADDR_SURF_16_BANK
));
1212 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1213 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1214 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1215 NUM_BANKS(ADDR_SURF_16_BANK
));
1218 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1219 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1220 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1221 NUM_BANKS(ADDR_SURF_16_BANK
));
1224 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1225 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1226 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1227 NUM_BANKS(ADDR_SURF_16_BANK
));
1230 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1231 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1232 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1233 NUM_BANKS(ADDR_SURF_8_BANK
));
1236 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1237 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8
) |
1238 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1239 NUM_BANKS(ADDR_SURF_16_BANK
));
1242 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4
) |
1243 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1244 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1245 NUM_BANKS(ADDR_SURF_16_BANK
));
1248 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1249 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4
) |
1250 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1251 NUM_BANKS(ADDR_SURF_16_BANK
));
1254 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2
) |
1255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1257 NUM_BANKS(ADDR_SURF_16_BANK
));
1260 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1261 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2
) |
1262 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1263 NUM_BANKS(ADDR_SURF_16_BANK
));
1266 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4
) |
1269 NUM_BANKS(ADDR_SURF_16_BANK
));
1272 gb_tile_moden
= (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1
) |
1273 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1
) |
1274 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2
) |
1275 NUM_BANKS(ADDR_SURF_8_BANK
));
1281 WREG32(GB_MACROTILE_MODE0
+ (reg_offset
* 4), gb_tile_moden
);
1284 DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs
);
1288 * cik_select_se_sh - select which SE, SH to address
1290 * @rdev: radeon_device pointer
1291 * @se_num: shader engine to address
1292 * @sh_num: sh block to address
1294 * Select which SE, SH combinations to address. Certain
1295 * registers are instanced per SE or SH. 0xffffffff means
1296 * broadcast to all SEs or SHs (CIK).
1298 static void cik_select_se_sh(struct radeon_device
*rdev
,
1299 u32 se_num
, u32 sh_num
)
1301 u32 data
= INSTANCE_BROADCAST_WRITES
;
1303 if ((se_num
== 0xffffffff) && (sh_num
== 0xffffffff))
1304 data
= SH_BROADCAST_WRITES
| SE_BROADCAST_WRITES
;
1305 else if (se_num
== 0xffffffff)
1306 data
|= SE_BROADCAST_WRITES
| SH_INDEX(sh_num
);
1307 else if (sh_num
== 0xffffffff)
1308 data
|= SH_BROADCAST_WRITES
| SE_INDEX(se_num
);
1310 data
|= SH_INDEX(sh_num
) | SE_INDEX(se_num
);
1311 WREG32(GRBM_GFX_INDEX
, data
);
1315 * cik_create_bitmask - create a bitmask
1317 * @bit_width: length of the mask
1319 * create a variable length bit mask (CIK).
1320 * Returns the bitmask.
1322 static u32
cik_create_bitmask(u32 bit_width
)
1326 for (i
= 0; i
< bit_width
; i
++) {
1334 * cik_select_se_sh - select which SE, SH to address
1336 * @rdev: radeon_device pointer
1337 * @max_rb_num: max RBs (render backends) for the asic
1338 * @se_num: number of SEs (shader engines) for the asic
1339 * @sh_per_se: number of SH blocks per SE for the asic
1341 * Calculates the bitmask of disabled RBs (CIK).
1342 * Returns the disabled RB bitmask.
1344 static u32
cik_get_rb_disabled(struct radeon_device
*rdev
,
1345 u32 max_rb_num
, u32 se_num
,
1350 data
= RREG32(CC_RB_BACKEND_DISABLE
);
1352 data
&= BACKEND_DISABLE_MASK
;
1355 data
|= RREG32(GC_USER_RB_BACKEND_DISABLE
);
1357 data
>>= BACKEND_DISABLE_SHIFT
;
1359 mask
= cik_create_bitmask(max_rb_num
/ se_num
/ sh_per_se
);
1365 * cik_setup_rb - setup the RBs on the asic
1367 * @rdev: radeon_device pointer
1368 * @se_num: number of SEs (shader engines) for the asic
1369 * @sh_per_se: number of SH blocks per SE for the asic
1370 * @max_rb_num: max RBs (render backends) for the asic
1372 * Configures per-SE/SH RB registers (CIK).
1374 static void cik_setup_rb(struct radeon_device
*rdev
,
1375 u32 se_num
, u32 sh_per_se
,
1380 u32 disabled_rbs
= 0;
1381 u32 enabled_rbs
= 0;
1383 for (i
= 0; i
< se_num
; i
++) {
1384 for (j
= 0; j
< sh_per_se
; j
++) {
1385 cik_select_se_sh(rdev
, i
, j
);
1386 data
= cik_get_rb_disabled(rdev
, max_rb_num
, se_num
, sh_per_se
);
1387 disabled_rbs
|= data
<< ((i
* sh_per_se
+ j
) * CIK_RB_BITMAP_WIDTH_PER_SH
);
1390 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
1393 for (i
= 0; i
< max_rb_num
; i
++) {
1394 if (!(disabled_rbs
& mask
))
1395 enabled_rbs
|= mask
;
1399 for (i
= 0; i
< se_num
; i
++) {
1400 cik_select_se_sh(rdev
, i
, 0xffffffff);
1402 for (j
= 0; j
< sh_per_se
; j
++) {
1403 switch (enabled_rbs
& 3) {
1405 data
|= (RASTER_CONFIG_RB_MAP_0
<< (i
* sh_per_se
+ j
) * 2);
1408 data
|= (RASTER_CONFIG_RB_MAP_3
<< (i
* sh_per_se
+ j
) * 2);
1412 data
|= (RASTER_CONFIG_RB_MAP_2
<< (i
* sh_per_se
+ j
) * 2);
1417 WREG32(PA_SC_RASTER_CONFIG
, data
);
1419 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
1423 * cik_gpu_init - setup the 3D engine
1425 * @rdev: radeon_device pointer
1427 * Configures the 3D engine and tiling configuration
1428 * registers so that the 3D engine is usable.
1430 static void cik_gpu_init(struct radeon_device
*rdev
)
1432 u32 gb_addr_config
= RREG32(GB_ADDR_CONFIG
);
1433 u32 mc_shared_chmap
, mc_arb_ramcfg
;
1434 u32 hdp_host_path_cntl
;
1438 switch (rdev
->family
) {
1440 rdev
->config
.cik
.max_shader_engines
= 2;
1441 rdev
->config
.cik
.max_tile_pipes
= 4;
1442 rdev
->config
.cik
.max_cu_per_sh
= 7;
1443 rdev
->config
.cik
.max_sh_per_se
= 1;
1444 rdev
->config
.cik
.max_backends_per_se
= 2;
1445 rdev
->config
.cik
.max_texture_channel_caches
= 4;
1446 rdev
->config
.cik
.max_gprs
= 256;
1447 rdev
->config
.cik
.max_gs_threads
= 32;
1448 rdev
->config
.cik
.max_hw_contexts
= 8;
1450 rdev
->config
.cik
.sc_prim_fifo_size_frontend
= 0x20;
1451 rdev
->config
.cik
.sc_prim_fifo_size_backend
= 0x100;
1452 rdev
->config
.cik
.sc_hiz_tile_fifo_size
= 0x30;
1453 rdev
->config
.cik
.sc_earlyz_tile_fifo_size
= 0x130;
1454 gb_addr_config
= BONAIRE_GB_ADDR_CONFIG_GOLDEN
;
1461 rdev
->config
.cik
.max_shader_engines
= 1;
1462 rdev
->config
.cik
.max_tile_pipes
= 2;
1463 rdev
->config
.cik
.max_cu_per_sh
= 2;
1464 rdev
->config
.cik
.max_sh_per_se
= 1;
1465 rdev
->config
.cik
.max_backends_per_se
= 1;
1466 rdev
->config
.cik
.max_texture_channel_caches
= 2;
1467 rdev
->config
.cik
.max_gprs
= 256;
1468 rdev
->config
.cik
.max_gs_threads
= 16;
1469 rdev
->config
.cik
.max_hw_contexts
= 8;
1471 rdev
->config
.cik
.sc_prim_fifo_size_frontend
= 0x20;
1472 rdev
->config
.cik
.sc_prim_fifo_size_backend
= 0x100;
1473 rdev
->config
.cik
.sc_hiz_tile_fifo_size
= 0x30;
1474 rdev
->config
.cik
.sc_earlyz_tile_fifo_size
= 0x130;
1475 gb_addr_config
= BONAIRE_GB_ADDR_CONFIG_GOLDEN
;
1479 /* Initialize HDP */
1480 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
1481 WREG32((0x2c14 + j
), 0x00000000);
1482 WREG32((0x2c18 + j
), 0x00000000);
1483 WREG32((0x2c1c + j
), 0x00000000);
1484 WREG32((0x2c20 + j
), 0x00000000);
1485 WREG32((0x2c24 + j
), 0x00000000);
1488 WREG32(GRBM_CNTL
, GRBM_READ_TIMEOUT(0xff));
1490 WREG32(BIF_FB_EN
, FB_READ_EN
| FB_WRITE_EN
);
1492 mc_shared_chmap
= RREG32(MC_SHARED_CHMAP
);
1493 mc_arb_ramcfg
= RREG32(MC_ARB_RAMCFG
);
1495 rdev
->config
.cik
.num_tile_pipes
= rdev
->config
.cik
.max_tile_pipes
;
1496 rdev
->config
.cik
.mem_max_burst_length_bytes
= 256;
1497 tmp
= (mc_arb_ramcfg
& NOOFCOLS_MASK
) >> NOOFCOLS_SHIFT
;
1498 rdev
->config
.cik
.mem_row_size_in_kb
= (4 * (1 << (8 + tmp
))) / 1024;
1499 if (rdev
->config
.cik
.mem_row_size_in_kb
> 4)
1500 rdev
->config
.cik
.mem_row_size_in_kb
= 4;
1501 /* XXX use MC settings? */
1502 rdev
->config
.cik
.shader_engine_tile_size
= 32;
1503 rdev
->config
.cik
.num_gpus
= 1;
1504 rdev
->config
.cik
.multi_gpu_tile_size
= 64;
1506 /* fix up row size */
1507 gb_addr_config
&= ~ROW_SIZE_MASK
;
1508 switch (rdev
->config
.cik
.mem_row_size_in_kb
) {
1511 gb_addr_config
|= ROW_SIZE(0);
1514 gb_addr_config
|= ROW_SIZE(1);
1517 gb_addr_config
|= ROW_SIZE(2);
1521 /* setup tiling info dword. gb_addr_config is not adequate since it does
1522 * not have bank info, so create a custom tiling dword.
1523 * bits 3:0 num_pipes
1524 * bits 7:4 num_banks
1525 * bits 11:8 group_size
1526 * bits 15:12 row_size
1528 rdev
->config
.cik
.tile_config
= 0;
1529 switch (rdev
->config
.cik
.num_tile_pipes
) {
1531 rdev
->config
.cik
.tile_config
|= (0 << 0);
1534 rdev
->config
.cik
.tile_config
|= (1 << 0);
1537 rdev
->config
.cik
.tile_config
|= (2 << 0);
1541 /* XXX what about 12? */
1542 rdev
->config
.cik
.tile_config
|= (3 << 0);
1545 if ((mc_arb_ramcfg
& NOOFBANK_MASK
) >> NOOFBANK_SHIFT
)
1546 rdev
->config
.cik
.tile_config
|= 1 << 4;
1548 rdev
->config
.cik
.tile_config
|= 0 << 4;
1549 rdev
->config
.cik
.tile_config
|=
1550 ((gb_addr_config
& PIPE_INTERLEAVE_SIZE_MASK
) >> PIPE_INTERLEAVE_SIZE_SHIFT
) << 8;
1551 rdev
->config
.cik
.tile_config
|=
1552 ((gb_addr_config
& ROW_SIZE_MASK
) >> ROW_SIZE_SHIFT
) << 12;
1554 WREG32(GB_ADDR_CONFIG
, gb_addr_config
);
1555 WREG32(HDP_ADDR_CONFIG
, gb_addr_config
);
1556 WREG32(DMIF_ADDR_CALC
, gb_addr_config
);
1557 WREG32(SDMA0_TILING_CONFIG
+ SDMA0_REGISTER_OFFSET
, gb_addr_config
& 0x70);
1558 WREG32(SDMA0_TILING_CONFIG
+ SDMA1_REGISTER_OFFSET
, gb_addr_config
& 0x70);
1559 WREG32(UVD_UDEC_ADDR_CONFIG
, gb_addr_config
);
1560 WREG32(UVD_UDEC_DB_ADDR_CONFIG
, gb_addr_config
);
1561 WREG32(UVD_UDEC_DBW_ADDR_CONFIG
, gb_addr_config
);
1563 cik_tiling_mode_table_init(rdev
);
1565 cik_setup_rb(rdev
, rdev
->config
.cik
.max_shader_engines
,
1566 rdev
->config
.cik
.max_sh_per_se
,
1567 rdev
->config
.cik
.max_backends_per_se
);
1569 /* set HW defaults for 3D engine */
1570 WREG32(CP_MEQ_THRESHOLDS
, MEQ1_START(0x30) | MEQ2_START(0x60));
1572 WREG32(SX_DEBUG_1
, 0x20);
1574 WREG32(TA_CNTL_AUX
, 0x00010000);
1576 tmp
= RREG32(SPI_CONFIG_CNTL
);
1578 WREG32(SPI_CONFIG_CNTL
, tmp
);
1580 WREG32(SQ_CONFIG
, 1);
1582 WREG32(DB_DEBUG
, 0);
1584 tmp
= RREG32(DB_DEBUG2
) & ~0xf00fffff;
1586 WREG32(DB_DEBUG2
, tmp
);
1588 tmp
= RREG32(DB_DEBUG3
) & ~0x0002021c;
1590 WREG32(DB_DEBUG3
, tmp
);
1592 tmp
= RREG32(CB_HW_CONTROL
) & ~0x00010000;
1594 WREG32(CB_HW_CONTROL
, tmp
);
1596 WREG32(SPI_CONFIG_CNTL_1
, VTX_DONE_DELAY(4));
1598 WREG32(PA_SC_FIFO_SIZE
, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev
->config
.cik
.sc_prim_fifo_size_frontend
) |
1599 SC_BACKEND_PRIM_FIFO_SIZE(rdev
->config
.cik
.sc_prim_fifo_size_backend
) |
1600 SC_HIZ_TILE_FIFO_SIZE(rdev
->config
.cik
.sc_hiz_tile_fifo_size
) |
1601 SC_EARLYZ_TILE_FIFO_SIZE(rdev
->config
.cik
.sc_earlyz_tile_fifo_size
)));
1603 WREG32(VGT_NUM_INSTANCES
, 1);
1605 WREG32(CP_PERFMON_CNTL
, 0);
1607 WREG32(SQ_CONFIG
, 0);
1609 WREG32(PA_SC_FORCE_EOV_MAX_CNTS
, (FORCE_EOV_MAX_CLK_CNT(4095) |
1610 FORCE_EOV_MAX_REZ_CNT(255)));
1612 WREG32(VGT_CACHE_INVALIDATION
, CACHE_INVALIDATION(VC_AND_TC
) |
1613 AUTO_INVLD_EN(ES_AND_GS_AUTO
));
1615 WREG32(VGT_GS_VERTEX_REUSE
, 16);
1616 WREG32(PA_SC_LINE_STIPPLE_STATE
, 0);
1618 tmp
= RREG32(HDP_MISC_CNTL
);
1619 tmp
|= HDP_FLUSH_INVALIDATE_CACHE
;
1620 WREG32(HDP_MISC_CNTL
, tmp
);
1622 hdp_host_path_cntl
= RREG32(HDP_HOST_PATH_CNTL
);
1623 WREG32(HDP_HOST_PATH_CNTL
, hdp_host_path_cntl
);
1625 WREG32(PA_CL_ENHANCE
, CLIP_VTX_REORDER_ENA
| NUM_CLIP_SEQ(3));
1626 WREG32(PA_SC_ENHANCE
, ENABLE_PA_SC_OUT_OF_ORDER
);
1632 * GPU scratch registers helpers function.
1635 * cik_scratch_init - setup driver info for CP scratch regs
1637 * @rdev: radeon_device pointer
1639 * Set up the number and offset of the CP scratch registers.
1640 * NOTE: use of CP scratch registers is a legacy inferface and
1641 * is not used by default on newer asics (r6xx+). On newer asics,
1642 * memory buffers are used for fences rather than scratch regs.
1644 static void cik_scratch_init(struct radeon_device
*rdev
)
1648 rdev
->scratch
.num_reg
= 7;
1649 rdev
->scratch
.reg_base
= SCRATCH_REG0
;
1650 for (i
= 0; i
< rdev
->scratch
.num_reg
; i
++) {
1651 rdev
->scratch
.free
[i
] = true;
1652 rdev
->scratch
.reg
[i
] = rdev
->scratch
.reg_base
+ (i
* 4);
1657 * cik_ring_test - basic gfx ring test
1659 * @rdev: radeon_device pointer
1660 * @ring: radeon_ring structure holding ring information
1662 * Allocate a scratch register and write to it using the gfx ring (CIK).
1663 * Provides a basic gfx ring test to verify that the ring is working.
1664 * Used by cik_cp_gfx_resume();
1665 * Returns 0 on success, error on failure.
1667 int cik_ring_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
1674 r
= radeon_scratch_get(rdev
, &scratch
);
1676 DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r
);
1679 WREG32(scratch
, 0xCAFEDEAD);
1680 r
= radeon_ring_lock(rdev
, ring
, 3);
1682 DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring
->idx
, r
);
1683 radeon_scratch_free(rdev
, scratch
);
1686 radeon_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
1687 radeon_ring_write(ring
, ((scratch
- PACKET3_SET_UCONFIG_REG_START
) >> 2));
1688 radeon_ring_write(ring
, 0xDEADBEEF);
1689 radeon_ring_unlock_commit(rdev
, ring
);
1691 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
1692 tmp
= RREG32(scratch
);
1693 if (tmp
== 0xDEADBEEF)
1697 if (i
< rdev
->usec_timeout
) {
1698 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
1700 DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
1701 ring
->idx
, scratch
, tmp
);
1704 radeon_scratch_free(rdev
, scratch
);
1709 * cik_fence_ring_emit - emit a fence on the gfx ring
1711 * @rdev: radeon_device pointer
1712 * @fence: radeon fence object
1714 * Emits a fence sequnce number on the gfx ring and flushes
1717 void cik_fence_ring_emit(struct radeon_device
*rdev
,
1718 struct radeon_fence
*fence
)
1720 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
1721 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
1723 /* EVENT_WRITE_EOP - flush caches, send int */
1724 radeon_ring_write(ring
, PACKET3(PACKET3_EVENT_WRITE_EOP
, 4));
1725 radeon_ring_write(ring
, (EOP_TCL1_ACTION_EN
|
1727 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT
) |
1729 radeon_ring_write(ring
, addr
& 0xfffffffc);
1730 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
1731 radeon_ring_write(ring
, fence
->seq
);
1732 radeon_ring_write(ring
, 0);
1734 /* We should be using the new WAIT_REG_MEM special op packet here
1735 * but it causes the CP to hang
1737 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
1738 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
1739 WRITE_DATA_DST_SEL(0)));
1740 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
1741 radeon_ring_write(ring
, 0);
1742 radeon_ring_write(ring
, 0);
1745 void cik_semaphore_ring_emit(struct radeon_device
*rdev
,
1746 struct radeon_ring
*ring
,
1747 struct radeon_semaphore
*semaphore
,
1750 uint64_t addr
= semaphore
->gpu_addr
;
1751 unsigned sel
= emit_wait
? PACKET3_SEM_SEL_WAIT
: PACKET3_SEM_SEL_SIGNAL
;
1753 radeon_ring_write(ring
, PACKET3(PACKET3_MEM_SEMAPHORE
, 1));
1754 radeon_ring_write(ring
, addr
& 0xffffffff);
1755 radeon_ring_write(ring
, (upper_32_bits(addr
) & 0xffff) | sel
);
1762 * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
1764 * @rdev: radeon_device pointer
1765 * @ib: radeon indirect buffer object
1767 * Emits an DE (drawing engine) or CE (constant engine) IB
1768 * on the gfx ring. IBs are usually generated by userspace
1769 * acceleration drivers and submitted to the kernel for
1770 * sheduling on the ring. This function schedules the IB
1771 * on the gfx ring for execution by the GPU.
1773 void cik_ring_ib_execute(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
1775 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
1776 u32 header
, control
= INDIRECT_BUFFER_VALID
;
1778 if (ib
->is_const_ib
) {
1779 /* set switch buffer packet before const IB */
1780 radeon_ring_write(ring
, PACKET3(PACKET3_SWITCH_BUFFER
, 0));
1781 radeon_ring_write(ring
, 0);
1783 header
= PACKET3(PACKET3_INDIRECT_BUFFER_CONST
, 2);
1786 if (ring
->rptr_save_reg
) {
1787 next_rptr
= ring
->wptr
+ 3 + 4;
1788 radeon_ring_write(ring
, PACKET3(PACKET3_SET_UCONFIG_REG
, 1));
1789 radeon_ring_write(ring
, ((ring
->rptr_save_reg
-
1790 PACKET3_SET_UCONFIG_REG_START
) >> 2));
1791 radeon_ring_write(ring
, next_rptr
);
1792 } else if (rdev
->wb
.enabled
) {
1793 next_rptr
= ring
->wptr
+ 5 + 4;
1794 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
1795 radeon_ring_write(ring
, WRITE_DATA_DST_SEL(1));
1796 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
1797 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
1798 radeon_ring_write(ring
, next_rptr
);
1801 header
= PACKET3(PACKET3_INDIRECT_BUFFER
, 2);
1804 control
|= ib
->length_dw
|
1805 (ib
->vm
? (ib
->vm
->id
<< 24) : 0);
1807 radeon_ring_write(ring
, header
);
1808 radeon_ring_write(ring
,
1812 (ib
->gpu_addr
& 0xFFFFFFFC));
1813 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xFFFF);
1814 radeon_ring_write(ring
, control
);
1818 * cik_ib_test - basic gfx ring IB test
1820 * @rdev: radeon_device pointer
1821 * @ring: radeon_ring structure holding ring information
1823 * Allocate an IB and execute it on the gfx ring (CIK).
1824 * Provides a basic gfx ring test to verify that IBs are working.
1825 * Returns 0 on success, error on failure.
1827 int cik_ib_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
1829 struct radeon_ib ib
;
1835 r
= radeon_scratch_get(rdev
, &scratch
);
1837 DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r
);
1840 WREG32(scratch
, 0xCAFEDEAD);
1841 r
= radeon_ib_get(rdev
, ring
->idx
, &ib
, NULL
, 256);
1843 DRM_ERROR("radeon: failed to get ib (%d).\n", r
);
1846 ib
.ptr
[0] = PACKET3(PACKET3_SET_UCONFIG_REG
, 1);
1847 ib
.ptr
[1] = ((scratch
- PACKET3_SET_UCONFIG_REG_START
) >> 2);
1848 ib
.ptr
[2] = 0xDEADBEEF;
1850 r
= radeon_ib_schedule(rdev
, &ib
, NULL
);
1852 radeon_scratch_free(rdev
, scratch
);
1853 radeon_ib_free(rdev
, &ib
);
1854 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r
);
1857 r
= radeon_fence_wait(ib
.fence
, false);
1859 DRM_ERROR("radeon: fence wait failed (%d).\n", r
);
1862 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
1863 tmp
= RREG32(scratch
);
1864 if (tmp
== 0xDEADBEEF)
1868 if (i
< rdev
->usec_timeout
) {
1869 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib
.fence
->ring
, i
);
1871 DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
1875 radeon_scratch_free(rdev
, scratch
);
1876 radeon_ib_free(rdev
, &ib
);
1882 * On CIK, gfx and compute now have independant command processors.
1885 * Gfx consists of a single ring and can process both gfx jobs and
1886 * compute jobs. The gfx CP consists of three microengines (ME):
1887 * PFP - Pre-Fetch Parser
1889 * CE - Constant Engine
1890 * The PFP and ME make up what is considered the Drawing Engine (DE).
1891 * The CE is an asynchronous engine used for updating buffer desciptors
1892 * used by the DE so that they can be loaded into cache in parallel
1893 * while the DE is processing state update packets.
1896 * The compute CP consists of two microengines (ME):
1897 * MEC1 - Compute MicroEngine 1
1898 * MEC2 - Compute MicroEngine 2
1899 * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
1900 * The queues are exposed to userspace and are programmed directly
1901 * by the compute runtime.
1904 * cik_cp_gfx_enable - enable/disable the gfx CP MEs
1906 * @rdev: radeon_device pointer
1907 * @enable: enable or disable the MEs
1909 * Halts or unhalts the gfx MEs.
1911 static void cik_cp_gfx_enable(struct radeon_device
*rdev
, bool enable
)
1914 WREG32(CP_ME_CNTL
, 0);
1916 WREG32(CP_ME_CNTL
, (CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
));
1917 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
1923 * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
1925 * @rdev: radeon_device pointer
1927 * Loads the gfx PFP, ME, and CE ucode.
1928 * Returns 0 for success, -EINVAL if the ucode is not available.
1930 static int cik_cp_gfx_load_microcode(struct radeon_device
*rdev
)
1932 const __be32
*fw_data
;
1935 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
)
1938 cik_cp_gfx_enable(rdev
, false);
1941 fw_data
= (const __be32
*)rdev
->pfp_fw
->data
;
1942 WREG32(CP_PFP_UCODE_ADDR
, 0);
1943 for (i
= 0; i
< CIK_PFP_UCODE_SIZE
; i
++)
1944 WREG32(CP_PFP_UCODE_DATA
, be32_to_cpup(fw_data
++));
1945 WREG32(CP_PFP_UCODE_ADDR
, 0);
1948 fw_data
= (const __be32
*)rdev
->ce_fw
->data
;
1949 WREG32(CP_CE_UCODE_ADDR
, 0);
1950 for (i
= 0; i
< CIK_CE_UCODE_SIZE
; i
++)
1951 WREG32(CP_CE_UCODE_DATA
, be32_to_cpup(fw_data
++));
1952 WREG32(CP_CE_UCODE_ADDR
, 0);
1955 fw_data
= (const __be32
*)rdev
->me_fw
->data
;
1956 WREG32(CP_ME_RAM_WADDR
, 0);
1957 for (i
= 0; i
< CIK_ME_UCODE_SIZE
; i
++)
1958 WREG32(CP_ME_RAM_DATA
, be32_to_cpup(fw_data
++));
1959 WREG32(CP_ME_RAM_WADDR
, 0);
1961 WREG32(CP_PFP_UCODE_ADDR
, 0);
1962 WREG32(CP_CE_UCODE_ADDR
, 0);
1963 WREG32(CP_ME_RAM_WADDR
, 0);
1964 WREG32(CP_ME_RAM_RADDR
, 0);
1969 * cik_cp_gfx_start - start the gfx ring
1971 * @rdev: radeon_device pointer
1973 * Enables the ring and loads the clear state context and other
1974 * packets required to init the ring.
1975 * Returns 0 for success, error for failure.
1977 static int cik_cp_gfx_start(struct radeon_device
*rdev
)
1979 struct radeon_ring
*ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
1983 WREG32(CP_MAX_CONTEXT
, rdev
->config
.cik
.max_hw_contexts
- 1);
1984 WREG32(CP_ENDIAN_SWAP
, 0);
1985 WREG32(CP_DEVICE_ID
, 1);
1987 cik_cp_gfx_enable(rdev
, true);
1989 r
= radeon_ring_lock(rdev
, ring
, cik_default_size
+ 17);
1991 DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r
);
1995 /* init the CE partitions. CE only used for gfx on CIK */
1996 radeon_ring_write(ring
, PACKET3(PACKET3_SET_BASE
, 2));
1997 radeon_ring_write(ring
, PACKET3_BASE_INDEX(CE_PARTITION_BASE
));
1998 radeon_ring_write(ring
, 0xc000);
1999 radeon_ring_write(ring
, 0xc000);
2001 /* setup clear context state */
2002 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2003 radeon_ring_write(ring
, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE
);
2005 radeon_ring_write(ring
, PACKET3(PACKET3_CONTEXT_CONTROL
, 1));
2006 radeon_ring_write(ring
, 0x80000000);
2007 radeon_ring_write(ring
, 0x80000000);
2009 for (i
= 0; i
< cik_default_size
; i
++)
2010 radeon_ring_write(ring
, cik_default_state
[i
]);
2012 radeon_ring_write(ring
, PACKET3(PACKET3_PREAMBLE_CNTL
, 0));
2013 radeon_ring_write(ring
, PACKET3_PREAMBLE_END_CLEAR_STATE
);
2015 /* set clear context state */
2016 radeon_ring_write(ring
, PACKET3(PACKET3_CLEAR_STATE
, 0));
2017 radeon_ring_write(ring
, 0);
2019 radeon_ring_write(ring
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
2020 radeon_ring_write(ring
, 0x00000316);
2021 radeon_ring_write(ring
, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2022 radeon_ring_write(ring
, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2024 radeon_ring_unlock_commit(rdev
, ring
);
2030 * cik_cp_gfx_fini - stop the gfx ring
2032 * @rdev: radeon_device pointer
2034 * Stop the gfx ring and tear down the driver ring
2037 static void cik_cp_gfx_fini(struct radeon_device
*rdev
)
2039 cik_cp_gfx_enable(rdev
, false);
2040 radeon_ring_fini(rdev
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
2044 * cik_cp_gfx_resume - setup the gfx ring buffer registers
2046 * @rdev: radeon_device pointer
2048 * Program the location and size of the gfx ring buffer
2049 * and test it to make sure it's working.
2050 * Returns 0 for success, error for failure.
2052 static int cik_cp_gfx_resume(struct radeon_device
*rdev
)
2054 struct radeon_ring
*ring
;
2060 WREG32(CP_SEM_WAIT_TIMER
, 0x0);
2061 WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL
, 0x0);
2063 /* Set the write pointer delay */
2064 WREG32(CP_RB_WPTR_DELAY
, 0);
2066 /* set the RB to use vmid 0 */
2067 WREG32(CP_RB_VMID
, 0);
2069 WREG32(SCRATCH_ADDR
, ((rdev
->wb
.gpu_addr
+ RADEON_WB_SCRATCH_OFFSET
) >> 8) & 0xFFFFFFFF);
2071 /* ring 0 - compute and gfx */
2072 /* Set ring buffer size */
2073 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
2074 rb_bufsz
= drm_order(ring
->ring_size
/ 8);
2075 tmp
= (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8) | rb_bufsz
;
2077 tmp
|= BUF_SWAP_32BIT
;
2079 WREG32(CP_RB0_CNTL
, tmp
);
2081 /* Initialize the ring buffer's read and write pointers */
2082 WREG32(CP_RB0_CNTL
, tmp
| RB_RPTR_WR_ENA
);
2084 WREG32(CP_RB0_WPTR
, ring
->wptr
);
2086 /* set the wb address wether it's enabled or not */
2087 WREG32(CP_RB0_RPTR_ADDR
, (rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFFFFFFFC);
2088 WREG32(CP_RB0_RPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ RADEON_WB_CP_RPTR_OFFSET
) & 0xFF);
2090 /* scratch register shadowing is no longer supported */
2091 WREG32(SCRATCH_UMSK
, 0);
2093 if (!rdev
->wb
.enabled
)
2094 tmp
|= RB_NO_UPDATE
;
2097 WREG32(CP_RB0_CNTL
, tmp
);
2099 rb_addr
= ring
->gpu_addr
>> 8;
2100 WREG32(CP_RB0_BASE
, rb_addr
);
2101 WREG32(CP_RB0_BASE_HI
, upper_32_bits(rb_addr
));
2103 ring
->rptr
= RREG32(CP_RB0_RPTR
);
2105 /* start the ring */
2106 cik_cp_gfx_start(rdev
);
2107 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= true;
2108 r
= radeon_ring_test(rdev
, RADEON_RING_TYPE_GFX_INDEX
, &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
]);
2110 rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
].ready
= false;
2116 u32
cik_compute_ring_get_rptr(struct radeon_device
*rdev
,
2117 struct radeon_ring
*ring
)
2123 if (rdev
->wb
.enabled
) {
2124 rptr
= le32_to_cpu(rdev
->wb
.wb
[ring
->rptr_offs
/4]);
2126 cik_srbm_select(rdev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2127 rptr
= RREG32(CP_HQD_PQ_RPTR
);
2128 cik_srbm_select(rdev
, 0, 0, 0, 0);
2130 rptr
= (rptr
& ring
->ptr_reg_mask
) >> ring
->ptr_reg_shift
;
2135 u32
cik_compute_ring_get_wptr(struct radeon_device
*rdev
,
2136 struct radeon_ring
*ring
)
2140 if (rdev
->wb
.enabled
) {
2141 wptr
= le32_to_cpu(rdev
->wb
.wb
[ring
->wptr_offs
/4]);
2143 cik_srbm_select(rdev
, ring
->me
, ring
->pipe
, ring
->queue
, 0);
2144 wptr
= RREG32(CP_HQD_PQ_WPTR
);
2145 cik_srbm_select(rdev
, 0, 0, 0, 0);
2147 wptr
= (wptr
& ring
->ptr_reg_mask
) >> ring
->ptr_reg_shift
;
2152 void cik_compute_ring_set_wptr(struct radeon_device
*rdev
,
2153 struct radeon_ring
*ring
)
2155 u32 wptr
= (ring
->wptr
<< ring
->ptr_reg_shift
) & ring
->ptr_reg_mask
;
2157 rdev
->wb
.wb
[ring
->wptr_offs
/4] = cpu_to_le32(wptr
);
2158 WDOORBELL32(ring
->doorbell_offset
, wptr
);
2162 * cik_cp_compute_enable - enable/disable the compute CP MEs
2164 * @rdev: radeon_device pointer
2165 * @enable: enable or disable the MEs
2167 * Halts or unhalts the compute MEs.
2169 static void cik_cp_compute_enable(struct radeon_device
*rdev
, bool enable
)
2172 WREG32(CP_MEC_CNTL
, 0);
2174 WREG32(CP_MEC_CNTL
, (MEC_ME1_HALT
| MEC_ME2_HALT
));
2179 * cik_cp_compute_load_microcode - load the compute CP ME ucode
2181 * @rdev: radeon_device pointer
2183 * Loads the compute MEC1&2 ucode.
2184 * Returns 0 for success, -EINVAL if the ucode is not available.
2186 static int cik_cp_compute_load_microcode(struct radeon_device
*rdev
)
2188 const __be32
*fw_data
;
2194 cik_cp_compute_enable(rdev
, false);
2197 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
2198 WREG32(CP_MEC_ME1_UCODE_ADDR
, 0);
2199 for (i
= 0; i
< CIK_MEC_UCODE_SIZE
; i
++)
2200 WREG32(CP_MEC_ME1_UCODE_DATA
, be32_to_cpup(fw_data
++));
2201 WREG32(CP_MEC_ME1_UCODE_ADDR
, 0);
2203 if (rdev
->family
== CHIP_KAVERI
) {
2205 fw_data
= (const __be32
*)rdev
->mec_fw
->data
;
2206 WREG32(CP_MEC_ME2_UCODE_ADDR
, 0);
2207 for (i
= 0; i
< CIK_MEC_UCODE_SIZE
; i
++)
2208 WREG32(CP_MEC_ME2_UCODE_DATA
, be32_to_cpup(fw_data
++));
2209 WREG32(CP_MEC_ME2_UCODE_ADDR
, 0);
2216 * cik_cp_compute_start - start the compute queues
2218 * @rdev: radeon_device pointer
2220 * Enable the compute queues.
2221 * Returns 0 for success, error for failure.
2223 static int cik_cp_compute_start(struct radeon_device
*rdev
)
2225 cik_cp_compute_enable(rdev
, true);
2231 * cik_cp_compute_fini - stop the compute queues
2233 * @rdev: radeon_device pointer
2235 * Stop the compute queues and tear down the driver queue
2238 static void cik_cp_compute_fini(struct radeon_device
*rdev
)
2242 cik_cp_compute_enable(rdev
, false);
2244 for (i
= 0; i
< 2; i
++) {
2246 idx
= CAYMAN_RING_TYPE_CP1_INDEX
;
2248 idx
= CAYMAN_RING_TYPE_CP2_INDEX
;
2250 if (rdev
->ring
[idx
].mqd_obj
) {
2251 r
= radeon_bo_reserve(rdev
->ring
[idx
].mqd_obj
, false);
2252 if (unlikely(r
!= 0))
2253 dev_warn(rdev
->dev
, "(%d) reserve MQD bo failed\n", r
);
2255 radeon_bo_unpin(rdev
->ring
[idx
].mqd_obj
);
2256 radeon_bo_unreserve(rdev
->ring
[idx
].mqd_obj
);
2258 radeon_bo_unref(&rdev
->ring
[idx
].mqd_obj
);
2259 rdev
->ring
[idx
].mqd_obj
= NULL
;
2264 static void cik_mec_fini(struct radeon_device
*rdev
)
2268 if (rdev
->mec
.hpd_eop_obj
) {
2269 r
= radeon_bo_reserve(rdev
->mec
.hpd_eop_obj
, false);
2270 if (unlikely(r
!= 0))
2271 dev_warn(rdev
->dev
, "(%d) reserve HPD EOP bo failed\n", r
);
2272 radeon_bo_unpin(rdev
->mec
.hpd_eop_obj
);
2273 radeon_bo_unreserve(rdev
->mec
.hpd_eop_obj
);
2275 radeon_bo_unref(&rdev
->mec
.hpd_eop_obj
);
2276 rdev
->mec
.hpd_eop_obj
= NULL
;
2280 #define MEC_HPD_SIZE 2048
2282 static int cik_mec_init(struct radeon_device
*rdev
)
2288 * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2289 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2291 if (rdev
->family
== CHIP_KAVERI
)
2292 rdev
->mec
.num_mec
= 2;
2294 rdev
->mec
.num_mec
= 1;
2295 rdev
->mec
.num_pipe
= 4;
2296 rdev
->mec
.num_queue
= rdev
->mec
.num_mec
* rdev
->mec
.num_pipe
* 8;
2298 if (rdev
->mec
.hpd_eop_obj
== NULL
) {
2299 r
= radeon_bo_create(rdev
,
2300 rdev
->mec
.num_mec
*rdev
->mec
.num_pipe
* MEC_HPD_SIZE
* 2,
2302 RADEON_GEM_DOMAIN_GTT
, NULL
,
2303 &rdev
->mec
.hpd_eop_obj
);
2305 dev_warn(rdev
->dev
, "(%d) create HDP EOP bo failed\n", r
);
2310 r
= radeon_bo_reserve(rdev
->mec
.hpd_eop_obj
, false);
2311 if (unlikely(r
!= 0)) {
2315 r
= radeon_bo_pin(rdev
->mec
.hpd_eop_obj
, RADEON_GEM_DOMAIN_GTT
,
2316 &rdev
->mec
.hpd_eop_gpu_addr
);
2318 dev_warn(rdev
->dev
, "(%d) pin HDP EOP bo failed\n", r
);
2322 r
= radeon_bo_kmap(rdev
->mec
.hpd_eop_obj
, (void **)&hpd
);
2324 dev_warn(rdev
->dev
, "(%d) map HDP EOP bo failed\n", r
);
2329 /* clear memory. Not sure if this is required or not */
2330 memset(hpd
, 0, rdev
->mec
.num_mec
*rdev
->mec
.num_pipe
* MEC_HPD_SIZE
* 2);
2332 radeon_bo_kunmap(rdev
->mec
.hpd_eop_obj
);
2333 radeon_bo_unreserve(rdev
->mec
.hpd_eop_obj
);
2338 struct hqd_registers
2340 u32 cp_mqd_base_addr
;
2341 u32 cp_mqd_base_addr_hi
;
2344 u32 cp_hqd_persistent_state
;
2345 u32 cp_hqd_pipe_priority
;
2346 u32 cp_hqd_queue_priority
;
2349 u32 cp_hqd_pq_base_hi
;
2351 u32 cp_hqd_pq_rptr_report_addr
;
2352 u32 cp_hqd_pq_rptr_report_addr_hi
;
2353 u32 cp_hqd_pq_wptr_poll_addr
;
2354 u32 cp_hqd_pq_wptr_poll_addr_hi
;
2355 u32 cp_hqd_pq_doorbell_control
;
2357 u32 cp_hqd_pq_control
;
2358 u32 cp_hqd_ib_base_addr
;
2359 u32 cp_hqd_ib_base_addr_hi
;
2361 u32 cp_hqd_ib_control
;
2362 u32 cp_hqd_iq_timer
;
2364 u32 cp_hqd_dequeue_request
;
2365 u32 cp_hqd_dma_offload
;
2366 u32 cp_hqd_sema_cmd
;
2367 u32 cp_hqd_msg_type
;
2368 u32 cp_hqd_atomic0_preop_lo
;
2369 u32 cp_hqd_atomic0_preop_hi
;
2370 u32 cp_hqd_atomic1_preop_lo
;
2371 u32 cp_hqd_atomic1_preop_hi
;
2372 u32 cp_hqd_hq_scheduler0
;
2373 u32 cp_hqd_hq_scheduler1
;
2380 u32 dispatch_initiator
;
2384 u32 pipeline_stat_enable
;
2385 u32 perf_counter_enable
;
2391 u32 resource_limits
;
2392 u32 static_thread_mgmt01
[2];
2394 u32 static_thread_mgmt23
[2];
2396 u32 thread_trace_enable
;
2399 u32 vgtcs_invoke_count
[2];
2400 struct hqd_registers queue_state
;
2402 u32 interrupt_queue
[64];
2406 * cik_cp_compute_resume - setup the compute queue registers
2408 * @rdev: radeon_device pointer
2410 * Program the compute queues and test them to make sure they
2412 * Returns 0 for success, error for failure.
2414 static int cik_cp_compute_resume(struct radeon_device
*rdev
)
2418 bool use_doorbell
= true;
2424 struct bonaire_mqd
*mqd
;
2426 r
= cik_cp_compute_start(rdev
);
2430 /* fix up chicken bits */
2431 tmp
= RREG32(CP_CPF_DEBUG
);
2433 WREG32(CP_CPF_DEBUG
, tmp
);
2435 /* init the pipes */
2436 for (i
= 0; i
< (rdev
->mec
.num_pipe
* rdev
->mec
.num_mec
); i
++) {
2437 int me
= (i
< 4) ? 1 : 2;
2438 int pipe
= (i
< 4) ? i
: (i
- 4);
2440 eop_gpu_addr
= rdev
->mec
.hpd_eop_gpu_addr
+ (i
* MEC_HPD_SIZE
* 2);
2442 cik_srbm_select(rdev
, me
, pipe
, 0, 0);
2444 /* write the EOP addr */
2445 WREG32(CP_HPD_EOP_BASE_ADDR
, eop_gpu_addr
>> 8);
2446 WREG32(CP_HPD_EOP_BASE_ADDR_HI
, upper_32_bits(eop_gpu_addr
) >> 8);
2448 /* set the VMID assigned */
2449 WREG32(CP_HPD_EOP_VMID
, 0);
2451 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2452 tmp
= RREG32(CP_HPD_EOP_CONTROL
);
2453 tmp
&= ~EOP_SIZE_MASK
;
2454 tmp
|= drm_order(MEC_HPD_SIZE
/ 8);
2455 WREG32(CP_HPD_EOP_CONTROL
, tmp
);
2457 cik_srbm_select(rdev
, 0, 0, 0, 0);
2459 /* init the queues. Just two for now. */
2460 for (i
= 0; i
< 2; i
++) {
2462 idx
= CAYMAN_RING_TYPE_CP1_INDEX
;
2464 idx
= CAYMAN_RING_TYPE_CP2_INDEX
;
2466 if (rdev
->ring
[idx
].mqd_obj
== NULL
) {
2467 r
= radeon_bo_create(rdev
,
2468 sizeof(struct bonaire_mqd
),
2470 RADEON_GEM_DOMAIN_GTT
, NULL
,
2471 &rdev
->ring
[idx
].mqd_obj
);
2473 dev_warn(rdev
->dev
, "(%d) create MQD bo failed\n", r
);
2478 r
= radeon_bo_reserve(rdev
->ring
[idx
].mqd_obj
, false);
2479 if (unlikely(r
!= 0)) {
2480 cik_cp_compute_fini(rdev
);
2483 r
= radeon_bo_pin(rdev
->ring
[idx
].mqd_obj
, RADEON_GEM_DOMAIN_GTT
,
2486 dev_warn(rdev
->dev
, "(%d) pin MQD bo failed\n", r
);
2487 cik_cp_compute_fini(rdev
);
2490 r
= radeon_bo_kmap(rdev
->ring
[idx
].mqd_obj
, (void **)&buf
);
2492 dev_warn(rdev
->dev
, "(%d) map MQD bo failed\n", r
);
2493 cik_cp_compute_fini(rdev
);
2497 /* doorbell offset */
2498 rdev
->ring
[idx
].doorbell_offset
=
2499 (rdev
->ring
[idx
].doorbell_page_num
* PAGE_SIZE
) + 0;
2501 /* init the mqd struct */
2502 memset(buf
, 0, sizeof(struct bonaire_mqd
));
2504 mqd
= (struct bonaire_mqd
*)buf
;
2505 mqd
->header
= 0xC0310800;
2506 mqd
->static_thread_mgmt01
[0] = 0xffffffff;
2507 mqd
->static_thread_mgmt01
[1] = 0xffffffff;
2508 mqd
->static_thread_mgmt23
[0] = 0xffffffff;
2509 mqd
->static_thread_mgmt23
[1] = 0xffffffff;
2511 cik_srbm_select(rdev
, rdev
->ring
[idx
].me
,
2512 rdev
->ring
[idx
].pipe
,
2513 rdev
->ring
[idx
].queue
, 0);
2515 /* disable wptr polling */
2516 tmp
= RREG32(CP_PQ_WPTR_POLL_CNTL
);
2517 tmp
&= ~WPTR_POLL_EN
;
2518 WREG32(CP_PQ_WPTR_POLL_CNTL
, tmp
);
2520 /* enable doorbell? */
2521 mqd
->queue_state
.cp_hqd_pq_doorbell_control
=
2522 RREG32(CP_HQD_PQ_DOORBELL_CONTROL
);
2524 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|= DOORBELL_EN
;
2526 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&= ~DOORBELL_EN
;
2527 WREG32(CP_HQD_PQ_DOORBELL_CONTROL
,
2528 mqd
->queue_state
.cp_hqd_pq_doorbell_control
);
2530 /* disable the queue if it's active */
2531 mqd
->queue_state
.cp_hqd_dequeue_request
= 0;
2532 mqd
->queue_state
.cp_hqd_pq_rptr
= 0;
2533 mqd
->queue_state
.cp_hqd_pq_wptr
= 0;
2534 if (RREG32(CP_HQD_ACTIVE
) & 1) {
2535 WREG32(CP_HQD_DEQUEUE_REQUEST
, 1);
2536 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
2537 if (!(RREG32(CP_HQD_ACTIVE
) & 1))
2541 WREG32(CP_HQD_DEQUEUE_REQUEST
, mqd
->queue_state
.cp_hqd_dequeue_request
);
2542 WREG32(CP_HQD_PQ_RPTR
, mqd
->queue_state
.cp_hqd_pq_rptr
);
2543 WREG32(CP_HQD_PQ_WPTR
, mqd
->queue_state
.cp_hqd_pq_wptr
);
2546 /* set the pointer to the MQD */
2547 mqd
->queue_state
.cp_mqd_base_addr
= mqd_gpu_addr
& 0xfffffffc;
2548 mqd
->queue_state
.cp_mqd_base_addr_hi
= upper_32_bits(mqd_gpu_addr
);
2549 WREG32(CP_MQD_BASE_ADDR
, mqd
->queue_state
.cp_mqd_base_addr
);
2550 WREG32(CP_MQD_BASE_ADDR_HI
, mqd
->queue_state
.cp_mqd_base_addr_hi
);
2551 /* set MQD vmid to 0 */
2552 mqd
->queue_state
.cp_mqd_control
= RREG32(CP_MQD_CONTROL
);
2553 mqd
->queue_state
.cp_mqd_control
&= ~MQD_VMID_MASK
;
2554 WREG32(CP_MQD_CONTROL
, mqd
->queue_state
.cp_mqd_control
);
2556 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2557 hqd_gpu_addr
= rdev
->ring
[idx
].gpu_addr
>> 8;
2558 mqd
->queue_state
.cp_hqd_pq_base
= hqd_gpu_addr
;
2559 mqd
->queue_state
.cp_hqd_pq_base_hi
= upper_32_bits(hqd_gpu_addr
);
2560 WREG32(CP_HQD_PQ_BASE
, mqd
->queue_state
.cp_hqd_pq_base
);
2561 WREG32(CP_HQD_PQ_BASE_HI
, mqd
->queue_state
.cp_hqd_pq_base_hi
);
2563 /* set up the HQD, this is similar to CP_RB0_CNTL */
2564 mqd
->queue_state
.cp_hqd_pq_control
= RREG32(CP_HQD_PQ_CONTROL
);
2565 mqd
->queue_state
.cp_hqd_pq_control
&=
2566 ~(QUEUE_SIZE_MASK
| RPTR_BLOCK_SIZE_MASK
);
2568 mqd
->queue_state
.cp_hqd_pq_control
|=
2569 drm_order(rdev
->ring
[idx
].ring_size
/ 8);
2570 mqd
->queue_state
.cp_hqd_pq_control
|=
2571 (drm_order(RADEON_GPU_PAGE_SIZE
/8) << 8);
2573 mqd
->queue_state
.cp_hqd_pq_control
|= BUF_SWAP_32BIT
;
2575 mqd
->queue_state
.cp_hqd_pq_control
&=
2576 ~(UNORD_DISPATCH
| ROQ_PQ_IB_FLIP
| PQ_VOLATILE
);
2577 mqd
->queue_state
.cp_hqd_pq_control
|=
2578 PRIV_STATE
| KMD_QUEUE
; /* assuming kernel queue control */
2579 WREG32(CP_HQD_PQ_CONTROL
, mqd
->queue_state
.cp_hqd_pq_control
);
2581 /* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
2583 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ CIK_WB_CP1_WPTR_OFFSET
;
2585 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ CIK_WB_CP2_WPTR_OFFSET
;
2586 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr
= wb_gpu_addr
& 0xfffffffc;
2587 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr_hi
= upper_32_bits(wb_gpu_addr
) & 0xffff;
2588 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR
, mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr
);
2589 WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI
,
2590 mqd
->queue_state
.cp_hqd_pq_wptr_poll_addr_hi
);
2592 /* set the wb address wether it's enabled or not */
2594 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP1_RPTR_OFFSET
;
2596 wb_gpu_addr
= rdev
->wb
.gpu_addr
+ RADEON_WB_CP2_RPTR_OFFSET
;
2597 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr
= wb_gpu_addr
& 0xfffffffc;
2598 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr_hi
=
2599 upper_32_bits(wb_gpu_addr
) & 0xffff;
2600 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR
,
2601 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr
);
2602 WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI
,
2603 mqd
->queue_state
.cp_hqd_pq_rptr_report_addr_hi
);
2605 /* enable the doorbell if requested */
2607 mqd
->queue_state
.cp_hqd_pq_doorbell_control
=
2608 RREG32(CP_HQD_PQ_DOORBELL_CONTROL
);
2609 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&= ~DOORBELL_OFFSET_MASK
;
2610 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|=
2611 DOORBELL_OFFSET(rdev
->ring
[idx
].doorbell_offset
/ 4);
2612 mqd
->queue_state
.cp_hqd_pq_doorbell_control
|= DOORBELL_EN
;
2613 mqd
->queue_state
.cp_hqd_pq_doorbell_control
&=
2614 ~(DOORBELL_SOURCE
| DOORBELL_HIT
);
2617 mqd
->queue_state
.cp_hqd_pq_doorbell_control
= 0;
2619 WREG32(CP_HQD_PQ_DOORBELL_CONTROL
,
2620 mqd
->queue_state
.cp_hqd_pq_doorbell_control
);
2622 /* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
2623 rdev
->ring
[idx
].wptr
= 0;
2624 mqd
->queue_state
.cp_hqd_pq_wptr
= rdev
->ring
[idx
].wptr
;
2625 WREG32(CP_HQD_PQ_WPTR
, mqd
->queue_state
.cp_hqd_pq_wptr
);
2626 rdev
->ring
[idx
].rptr
= RREG32(CP_HQD_PQ_RPTR
);
2627 mqd
->queue_state
.cp_hqd_pq_rptr
= rdev
->ring
[idx
].rptr
;
2629 /* set the vmid for the queue */
2630 mqd
->queue_state
.cp_hqd_vmid
= 0;
2631 WREG32(CP_HQD_VMID
, mqd
->queue_state
.cp_hqd_vmid
);
2633 /* activate the queue */
2634 mqd
->queue_state
.cp_hqd_active
= 1;
2635 WREG32(CP_HQD_ACTIVE
, mqd
->queue_state
.cp_hqd_active
);
2637 cik_srbm_select(rdev
, 0, 0, 0, 0);
2639 radeon_bo_kunmap(rdev
->ring
[idx
].mqd_obj
);
2640 radeon_bo_unreserve(rdev
->ring
[idx
].mqd_obj
);
2642 rdev
->ring
[idx
].ready
= true;
2643 r
= radeon_ring_test(rdev
, idx
, &rdev
->ring
[idx
]);
2645 rdev
->ring
[idx
].ready
= false;
2651 static void cik_cp_enable(struct radeon_device
*rdev
, bool enable
)
2653 cik_cp_gfx_enable(rdev
, enable
);
2654 cik_cp_compute_enable(rdev
, enable
);
2657 static int cik_cp_load_microcode(struct radeon_device
*rdev
)
2661 r
= cik_cp_gfx_load_microcode(rdev
);
2664 r
= cik_cp_compute_load_microcode(rdev
);
2671 static void cik_cp_fini(struct radeon_device
*rdev
)
2673 cik_cp_gfx_fini(rdev
);
2674 cik_cp_compute_fini(rdev
);
2677 static int cik_cp_resume(struct radeon_device
*rdev
)
2681 /* Reset all cp blocks */
2682 WREG32(GRBM_SOFT_RESET
, SOFT_RESET_CP
);
2683 RREG32(GRBM_SOFT_RESET
);
2685 WREG32(GRBM_SOFT_RESET
, 0);
2686 RREG32(GRBM_SOFT_RESET
);
2688 r
= cik_cp_load_microcode(rdev
);
2692 r
= cik_cp_gfx_resume(rdev
);
2695 r
= cik_cp_compute_resume(rdev
);
2704 * Starting with CIK, the GPU has new asynchronous
2705 * DMA engines. These engines are used for compute
2706 * and gfx. There are two DMA engines (SDMA0, SDMA1)
2707 * and each one supports 1 ring buffer used for gfx
2708 * and 2 queues used for compute.
2710 * The programming model is very similar to the CP
2711 * (ring buffer, IBs, etc.), but sDMA has it's own
2712 * packet format that is different from the PM4 format
2713 * used by the CP. sDMA supports copying data, writing
2714 * embedded data, solid fills, and a number of other
2715 * things. It also has support for tiling/detiling of
2719 * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
2721 * @rdev: radeon_device pointer
2722 * @ib: IB object to schedule
2724 * Schedule an IB in the DMA ring (CIK).
2726 void cik_sdma_ring_ib_execute(struct radeon_device
*rdev
,
2727 struct radeon_ib
*ib
)
2729 struct radeon_ring
*ring
= &rdev
->ring
[ib
->ring
];
2730 u32 extra_bits
= (ib
->vm
? ib
->vm
->id
: 0) & 0xf;
2732 if (rdev
->wb
.enabled
) {
2733 u32 next_rptr
= ring
->wptr
+ 5;
2734 while ((next_rptr
& 7) != 4)
2737 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0));
2738 radeon_ring_write(ring
, ring
->next_rptr_gpu_addr
& 0xfffffffc);
2739 radeon_ring_write(ring
, upper_32_bits(ring
->next_rptr_gpu_addr
) & 0xffffffff);
2740 radeon_ring_write(ring
, 1); /* number of DWs to follow */
2741 radeon_ring_write(ring
, next_rptr
);
2744 /* IB packet must end on a 8 DW boundary */
2745 while ((ring
->wptr
& 7) != 4)
2746 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
2747 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER
, 0, extra_bits
));
2748 radeon_ring_write(ring
, ib
->gpu_addr
& 0xffffffe0); /* base must be 32 byte aligned */
2749 radeon_ring_write(ring
, upper_32_bits(ib
->gpu_addr
) & 0xffffffff);
2750 radeon_ring_write(ring
, ib
->length_dw
);
2755 * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
2757 * @rdev: radeon_device pointer
2758 * @fence: radeon fence object
2760 * Add a DMA fence packet to the ring to write
2761 * the fence seq number and DMA trap packet to generate
2762 * an interrupt if needed (CIK).
2764 void cik_sdma_fence_ring_emit(struct radeon_device
*rdev
,
2765 struct radeon_fence
*fence
)
2767 struct radeon_ring
*ring
= &rdev
->ring
[fence
->ring
];
2768 u64 addr
= rdev
->fence_drv
[fence
->ring
].gpu_addr
;
2769 u32 extra_bits
= (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
2770 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
2773 if (fence
->ring
== R600_RING_TYPE_DMA_INDEX
)
2774 ref_and_mask
= SDMA0
;
2776 ref_and_mask
= SDMA1
;
2778 /* write the fence */
2779 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_FENCE
, 0, 0));
2780 radeon_ring_write(ring
, addr
& 0xffffffff);
2781 radeon_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
2782 radeon_ring_write(ring
, fence
->seq
);
2783 /* generate an interrupt */
2784 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_TRAP
, 0, 0));
2786 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM
, 0, extra_bits
));
2787 radeon_ring_write(ring
, GPU_HDP_FLUSH_DONE
);
2788 radeon_ring_write(ring
, GPU_HDP_FLUSH_REQ
);
2789 radeon_ring_write(ring
, ref_and_mask
); /* REFERENCE */
2790 radeon_ring_write(ring
, ref_and_mask
); /* MASK */
2791 radeon_ring_write(ring
, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
2795 * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
2797 * @rdev: radeon_device pointer
2798 * @ring: radeon_ring structure holding ring information
2799 * @semaphore: radeon semaphore object
2800 * @emit_wait: wait or signal semaphore
2802 * Add a DMA semaphore packet to the ring wait on or signal
2803 * other rings (CIK).
2805 void cik_sdma_semaphore_ring_emit(struct radeon_device
*rdev
,
2806 struct radeon_ring
*ring
,
2807 struct radeon_semaphore
*semaphore
,
2810 u64 addr
= semaphore
->gpu_addr
;
2811 u32 extra_bits
= emit_wait
? 0 : SDMA_SEMAPHORE_EXTRA_S
;
2813 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE
, 0, extra_bits
));
2814 radeon_ring_write(ring
, addr
& 0xfffffff8);
2815 radeon_ring_write(ring
, upper_32_bits(addr
) & 0xffffffff);
2819 * cik_sdma_gfx_stop - stop the gfx async dma engines
2821 * @rdev: radeon_device pointer
2823 * Stop the gfx async dma ring buffers (CIK).
2825 static void cik_sdma_gfx_stop(struct radeon_device
*rdev
)
2827 u32 rb_cntl
, reg_offset
;
2830 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
2832 for (i
= 0; i
< 2; i
++) {
2834 reg_offset
= SDMA0_REGISTER_OFFSET
;
2836 reg_offset
= SDMA1_REGISTER_OFFSET
;
2837 rb_cntl
= RREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
);
2838 rb_cntl
&= ~SDMA_RB_ENABLE
;
2839 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
);
2840 WREG32(SDMA0_GFX_IB_CNTL
+ reg_offset
, 0);
2845 * cik_sdma_rlc_stop - stop the compute async dma engines
2847 * @rdev: radeon_device pointer
2849 * Stop the compute async dma queues (CIK).
2851 static void cik_sdma_rlc_stop(struct radeon_device
*rdev
)
2857 * cik_sdma_enable - stop the async dma engines
2859 * @rdev: radeon_device pointer
2860 * @enable: enable/disable the DMA MEs.
2862 * Halt or unhalt the async dma engines (CIK).
2864 static void cik_sdma_enable(struct radeon_device
*rdev
, bool enable
)
2866 u32 me_cntl
, reg_offset
;
2869 for (i
= 0; i
< 2; i
++) {
2871 reg_offset
= SDMA0_REGISTER_OFFSET
;
2873 reg_offset
= SDMA1_REGISTER_OFFSET
;
2874 me_cntl
= RREG32(SDMA0_ME_CNTL
+ reg_offset
);
2876 me_cntl
&= ~SDMA_HALT
;
2878 me_cntl
|= SDMA_HALT
;
2879 WREG32(SDMA0_ME_CNTL
+ reg_offset
, me_cntl
);
2884 * cik_sdma_gfx_resume - setup and start the async dma engines
2886 * @rdev: radeon_device pointer
2888 * Set up the gfx DMA ring buffers and enable them (CIK).
2889 * Returns 0 for success, error for failure.
2891 static int cik_sdma_gfx_resume(struct radeon_device
*rdev
)
2893 struct radeon_ring
*ring
;
2894 u32 rb_cntl
, ib_cntl
;
2896 u32 reg_offset
, wb_offset
;
2899 for (i
= 0; i
< 2; i
++) {
2901 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
2902 reg_offset
= SDMA0_REGISTER_OFFSET
;
2903 wb_offset
= R600_WB_DMA_RPTR_OFFSET
;
2905 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
2906 reg_offset
= SDMA1_REGISTER_OFFSET
;
2907 wb_offset
= CAYMAN_WB_DMA1_RPTR_OFFSET
;
2910 WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL
+ reg_offset
, 0);
2911 WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL
+ reg_offset
, 0);
2913 /* Set ring buffer size in dwords */
2914 rb_bufsz
= drm_order(ring
->ring_size
/ 4);
2915 rb_cntl
= rb_bufsz
<< 1;
2917 rb_cntl
|= SDMA_RB_SWAP_ENABLE
| SDMA_RPTR_WRITEBACK_SWAP_ENABLE
;
2919 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
);
2921 /* Initialize the ring buffer's read and write pointers */
2922 WREG32(SDMA0_GFX_RB_RPTR
+ reg_offset
, 0);
2923 WREG32(SDMA0_GFX_RB_WPTR
+ reg_offset
, 0);
2925 /* set the wb address whether it's enabled or not */
2926 WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI
+ reg_offset
,
2927 upper_32_bits(rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFF);
2928 WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO
+ reg_offset
,
2929 ((rdev
->wb
.gpu_addr
+ wb_offset
) & 0xFFFFFFFC));
2931 if (rdev
->wb
.enabled
)
2932 rb_cntl
|= SDMA_RPTR_WRITEBACK_ENABLE
;
2934 WREG32(SDMA0_GFX_RB_BASE
+ reg_offset
, ring
->gpu_addr
>> 8);
2935 WREG32(SDMA0_GFX_RB_BASE_HI
+ reg_offset
, ring
->gpu_addr
>> 40);
2938 WREG32(SDMA0_GFX_RB_WPTR
+ reg_offset
, ring
->wptr
<< 2);
2940 ring
->rptr
= RREG32(SDMA0_GFX_RB_RPTR
+ reg_offset
) >> 2;
2943 WREG32(SDMA0_GFX_RB_CNTL
+ reg_offset
, rb_cntl
| SDMA_RB_ENABLE
);
2945 ib_cntl
= SDMA_IB_ENABLE
;
2947 ib_cntl
|= SDMA_IB_SWAP_ENABLE
;
2949 /* enable DMA IBs */
2950 WREG32(SDMA0_GFX_IB_CNTL
+ reg_offset
, ib_cntl
);
2954 r
= radeon_ring_test(rdev
, ring
->idx
, ring
);
2956 ring
->ready
= false;
2961 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.real_vram_size
);
2967 * cik_sdma_rlc_resume - setup and start the async dma engines
2969 * @rdev: radeon_device pointer
2971 * Set up the compute DMA queues and enable them (CIK).
2972 * Returns 0 for success, error for failure.
2974 static int cik_sdma_rlc_resume(struct radeon_device
*rdev
)
2981 * cik_sdma_load_microcode - load the sDMA ME ucode
2983 * @rdev: radeon_device pointer
2985 * Loads the sDMA0/1 ucode.
2986 * Returns 0 for success, -EINVAL if the ucode is not available.
2988 static int cik_sdma_load_microcode(struct radeon_device
*rdev
)
2990 const __be32
*fw_data
;
2996 /* stop the gfx rings and rlc compute queues */
2997 cik_sdma_gfx_stop(rdev
);
2998 cik_sdma_rlc_stop(rdev
);
3001 cik_sdma_enable(rdev
, false);
3004 fw_data
= (const __be32
*)rdev
->sdma_fw
->data
;
3005 WREG32(SDMA0_UCODE_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
3006 for (i
= 0; i
< CIK_SDMA_UCODE_SIZE
; i
++)
3007 WREG32(SDMA0_UCODE_DATA
+ SDMA0_REGISTER_OFFSET
, be32_to_cpup(fw_data
++));
3008 WREG32(SDMA0_UCODE_DATA
+ SDMA0_REGISTER_OFFSET
, CIK_SDMA_UCODE_VERSION
);
3011 fw_data
= (const __be32
*)rdev
->sdma_fw
->data
;
3012 WREG32(SDMA0_UCODE_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
3013 for (i
= 0; i
< CIK_SDMA_UCODE_SIZE
; i
++)
3014 WREG32(SDMA0_UCODE_DATA
+ SDMA1_REGISTER_OFFSET
, be32_to_cpup(fw_data
++));
3015 WREG32(SDMA0_UCODE_DATA
+ SDMA1_REGISTER_OFFSET
, CIK_SDMA_UCODE_VERSION
);
3017 WREG32(SDMA0_UCODE_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
3018 WREG32(SDMA0_UCODE_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
3023 * cik_sdma_resume - setup and start the async dma engines
3025 * @rdev: radeon_device pointer
3027 * Set up the DMA engines and enable them (CIK).
3028 * Returns 0 for success, error for failure.
3030 static int cik_sdma_resume(struct radeon_device
*rdev
)
3035 WREG32(SRBM_SOFT_RESET
, SOFT_RESET_SDMA
| SOFT_RESET_SDMA1
);
3036 RREG32(SRBM_SOFT_RESET
);
3038 WREG32(SRBM_SOFT_RESET
, 0);
3039 RREG32(SRBM_SOFT_RESET
);
3041 r
= cik_sdma_load_microcode(rdev
);
3045 /* unhalt the MEs */
3046 cik_sdma_enable(rdev
, true);
3048 /* start the gfx rings and rlc compute queues */
3049 r
= cik_sdma_gfx_resume(rdev
);
3052 r
= cik_sdma_rlc_resume(rdev
);
3060 * cik_sdma_fini - tear down the async dma engines
3062 * @rdev: radeon_device pointer
3064 * Stop the async dma engines and free the rings (CIK).
3066 static void cik_sdma_fini(struct radeon_device
*rdev
)
3068 /* stop the gfx rings and rlc compute queues */
3069 cik_sdma_gfx_stop(rdev
);
3070 cik_sdma_rlc_stop(rdev
);
3072 cik_sdma_enable(rdev
, false);
3073 radeon_ring_fini(rdev
, &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
]);
3074 radeon_ring_fini(rdev
, &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
]);
3075 /* XXX - compute dma queue tear down */
3079 * cik_copy_dma - copy pages using the DMA engine
3081 * @rdev: radeon_device pointer
3082 * @src_offset: src GPU address
3083 * @dst_offset: dst GPU address
3084 * @num_gpu_pages: number of GPU pages to xfer
3085 * @fence: radeon fence object
3087 * Copy GPU paging using the DMA engine (CIK).
3088 * Used by the radeon ttm implementation to move pages if
3089 * registered as the asic copy callback.
3091 int cik_copy_dma(struct radeon_device
*rdev
,
3092 uint64_t src_offset
, uint64_t dst_offset
,
3093 unsigned num_gpu_pages
,
3094 struct radeon_fence
**fence
)
3096 struct radeon_semaphore
*sem
= NULL
;
3097 int ring_index
= rdev
->asic
->copy
.dma_ring_index
;
3098 struct radeon_ring
*ring
= &rdev
->ring
[ring_index
];
3099 u32 size_in_bytes
, cur_size_in_bytes
;
3103 r
= radeon_semaphore_create(rdev
, &sem
);
3105 DRM_ERROR("radeon: moving bo (%d).\n", r
);
3109 size_in_bytes
= (num_gpu_pages
<< RADEON_GPU_PAGE_SHIFT
);
3110 num_loops
= DIV_ROUND_UP(size_in_bytes
, 0x1fffff);
3111 r
= radeon_ring_lock(rdev
, ring
, num_loops
* 7 + 14);
3113 DRM_ERROR("radeon: moving bo (%d).\n", r
);
3114 radeon_semaphore_free(rdev
, &sem
, NULL
);
3118 if (radeon_fence_need_sync(*fence
, ring
->idx
)) {
3119 radeon_semaphore_sync_rings(rdev
, sem
, (*fence
)->ring
,
3121 radeon_fence_note_sync(*fence
, ring
->idx
);
3123 radeon_semaphore_free(rdev
, &sem
, NULL
);
3126 for (i
= 0; i
< num_loops
; i
++) {
3127 cur_size_in_bytes
= size_in_bytes
;
3128 if (cur_size_in_bytes
> 0x1fffff)
3129 cur_size_in_bytes
= 0x1fffff;
3130 size_in_bytes
-= cur_size_in_bytes
;
3131 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_COPY
, SDMA_COPY_SUB_OPCODE_LINEAR
, 0));
3132 radeon_ring_write(ring
, cur_size_in_bytes
);
3133 radeon_ring_write(ring
, 0); /* src/dst endian swap */
3134 radeon_ring_write(ring
, src_offset
& 0xffffffff);
3135 radeon_ring_write(ring
, upper_32_bits(src_offset
) & 0xffffffff);
3136 radeon_ring_write(ring
, dst_offset
& 0xfffffffc);
3137 radeon_ring_write(ring
, upper_32_bits(dst_offset
) & 0xffffffff);
3138 src_offset
+= cur_size_in_bytes
;
3139 dst_offset
+= cur_size_in_bytes
;
3142 r
= radeon_fence_emit(rdev
, fence
, ring
->idx
);
3144 radeon_ring_unlock_undo(rdev
, ring
);
3148 radeon_ring_unlock_commit(rdev
, ring
);
3149 radeon_semaphore_free(rdev
, &sem
, *fence
);
3155 * cik_sdma_ring_test - simple async dma engine test
3157 * @rdev: radeon_device pointer
3158 * @ring: radeon_ring structure holding ring information
3160 * Test the DMA engine by writing using it to write an
3161 * value to memory. (CIK).
3162 * Returns 0 for success, error for failure.
3164 int cik_sdma_ring_test(struct radeon_device
*rdev
,
3165 struct radeon_ring
*ring
)
3169 void __iomem
*ptr
= (void *)rdev
->vram_scratch
.ptr
;
3173 DRM_ERROR("invalid vram scratch pointer\n");
3180 r
= radeon_ring_lock(rdev
, ring
, 4);
3182 DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring
->idx
, r
);
3185 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0));
3186 radeon_ring_write(ring
, rdev
->vram_scratch
.gpu_addr
& 0xfffffffc);
3187 radeon_ring_write(ring
, upper_32_bits(rdev
->vram_scratch
.gpu_addr
) & 0xffffffff);
3188 radeon_ring_write(ring
, 1); /* number of DWs to follow */
3189 radeon_ring_write(ring
, 0xDEADBEEF);
3190 radeon_ring_unlock_commit(rdev
, ring
);
3192 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3194 if (tmp
== 0xDEADBEEF)
3199 if (i
< rdev
->usec_timeout
) {
3200 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring
->idx
, i
);
3202 DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3210 * cik_sdma_ib_test - test an IB on the DMA engine
3212 * @rdev: radeon_device pointer
3213 * @ring: radeon_ring structure holding ring information
3215 * Test a simple IB in the DMA ring (CIK).
3216 * Returns 0 on success, error on failure.
3218 int cik_sdma_ib_test(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
3220 struct radeon_ib ib
;
3223 void __iomem
*ptr
= (void *)rdev
->vram_scratch
.ptr
;
3227 DRM_ERROR("invalid vram scratch pointer\n");
3234 r
= radeon_ib_get(rdev
, ring
->idx
, &ib
, NULL
, 256);
3236 DRM_ERROR("radeon: failed to get ib (%d).\n", r
);
3240 ib
.ptr
[0] = SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0);
3241 ib
.ptr
[1] = rdev
->vram_scratch
.gpu_addr
& 0xfffffffc;
3242 ib
.ptr
[2] = upper_32_bits(rdev
->vram_scratch
.gpu_addr
) & 0xffffffff;
3244 ib
.ptr
[4] = 0xDEADBEEF;
3247 r
= radeon_ib_schedule(rdev
, &ib
, NULL
);
3249 radeon_ib_free(rdev
, &ib
);
3250 DRM_ERROR("radeon: failed to schedule ib (%d).\n", r
);
3253 r
= radeon_fence_wait(ib
.fence
, false);
3255 DRM_ERROR("radeon: fence wait failed (%d).\n", r
);
3258 for (i
= 0; i
< rdev
->usec_timeout
; i
++) {
3260 if (tmp
== 0xDEADBEEF)
3264 if (i
< rdev
->usec_timeout
) {
3265 DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib
.fence
->ring
, i
);
3267 DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp
);
3270 radeon_ib_free(rdev
, &ib
);
3275 static void cik_print_gpu_status_regs(struct radeon_device
*rdev
)
3277 dev_info(rdev
->dev
, " GRBM_STATUS=0x%08X\n",
3278 RREG32(GRBM_STATUS
));
3279 dev_info(rdev
->dev
, " GRBM_STATUS2=0x%08X\n",
3280 RREG32(GRBM_STATUS2
));
3281 dev_info(rdev
->dev
, " GRBM_STATUS_SE0=0x%08X\n",
3282 RREG32(GRBM_STATUS_SE0
));
3283 dev_info(rdev
->dev
, " GRBM_STATUS_SE1=0x%08X\n",
3284 RREG32(GRBM_STATUS_SE1
));
3285 dev_info(rdev
->dev
, " GRBM_STATUS_SE2=0x%08X\n",
3286 RREG32(GRBM_STATUS_SE2
));
3287 dev_info(rdev
->dev
, " GRBM_STATUS_SE3=0x%08X\n",
3288 RREG32(GRBM_STATUS_SE3
));
3289 dev_info(rdev
->dev
, " SRBM_STATUS=0x%08X\n",
3290 RREG32(SRBM_STATUS
));
3291 dev_info(rdev
->dev
, " SRBM_STATUS2=0x%08X\n",
3292 RREG32(SRBM_STATUS2
));
3293 dev_info(rdev
->dev
, " SDMA0_STATUS_REG = 0x%08X\n",
3294 RREG32(SDMA0_STATUS_REG
+ SDMA0_REGISTER_OFFSET
));
3295 dev_info(rdev
->dev
, " SDMA1_STATUS_REG = 0x%08X\n",
3296 RREG32(SDMA0_STATUS_REG
+ SDMA1_REGISTER_OFFSET
));
3297 dev_info(rdev
->dev
, " CP_STAT = 0x%08x\n", RREG32(CP_STAT
));
3298 dev_info(rdev
->dev
, " CP_STALLED_STAT1 = 0x%08x\n",
3299 RREG32(CP_STALLED_STAT1
));
3300 dev_info(rdev
->dev
, " CP_STALLED_STAT2 = 0x%08x\n",
3301 RREG32(CP_STALLED_STAT2
));
3302 dev_info(rdev
->dev
, " CP_STALLED_STAT3 = 0x%08x\n",
3303 RREG32(CP_STALLED_STAT3
));
3304 dev_info(rdev
->dev
, " CP_CPF_BUSY_STAT = 0x%08x\n",
3305 RREG32(CP_CPF_BUSY_STAT
));
3306 dev_info(rdev
->dev
, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3307 RREG32(CP_CPF_STALLED_STAT1
));
3308 dev_info(rdev
->dev
, " CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS
));
3309 dev_info(rdev
->dev
, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT
));
3310 dev_info(rdev
->dev
, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3311 RREG32(CP_CPC_STALLED_STAT1
));
3312 dev_info(rdev
->dev
, " CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS
));
3316 * cik_gpu_check_soft_reset - check which blocks are busy
3318 * @rdev: radeon_device pointer
3320 * Check which blocks are busy and return the relevant reset
3321 * mask to be used by cik_gpu_soft_reset().
3322 * Returns a mask of the blocks to be reset.
3324 static u32
cik_gpu_check_soft_reset(struct radeon_device
*rdev
)
3330 tmp
= RREG32(GRBM_STATUS
);
3331 if (tmp
& (PA_BUSY
| SC_BUSY
|
3332 BCI_BUSY
| SX_BUSY
|
3333 TA_BUSY
| VGT_BUSY
|
3335 GDS_BUSY
| SPI_BUSY
|
3336 IA_BUSY
| IA_BUSY_NO_DMA
))
3337 reset_mask
|= RADEON_RESET_GFX
;
3339 if (tmp
& (CP_BUSY
| CP_COHERENCY_BUSY
))
3340 reset_mask
|= RADEON_RESET_CP
;
3343 tmp
= RREG32(GRBM_STATUS2
);
3345 reset_mask
|= RADEON_RESET_RLC
;
3347 /* SDMA0_STATUS_REG */
3348 tmp
= RREG32(SDMA0_STATUS_REG
+ SDMA0_REGISTER_OFFSET
);
3349 if (!(tmp
& SDMA_IDLE
))
3350 reset_mask
|= RADEON_RESET_DMA
;
3352 /* SDMA1_STATUS_REG */
3353 tmp
= RREG32(SDMA0_STATUS_REG
+ SDMA1_REGISTER_OFFSET
);
3354 if (!(tmp
& SDMA_IDLE
))
3355 reset_mask
|= RADEON_RESET_DMA1
;
3358 tmp
= RREG32(SRBM_STATUS2
);
3359 if (tmp
& SDMA_BUSY
)
3360 reset_mask
|= RADEON_RESET_DMA
;
3362 if (tmp
& SDMA1_BUSY
)
3363 reset_mask
|= RADEON_RESET_DMA1
;
3366 tmp
= RREG32(SRBM_STATUS
);
3369 reset_mask
|= RADEON_RESET_IH
;
3372 reset_mask
|= RADEON_RESET_SEM
;
3374 if (tmp
& GRBM_RQ_PENDING
)
3375 reset_mask
|= RADEON_RESET_GRBM
;
3378 reset_mask
|= RADEON_RESET_VMC
;
3380 if (tmp
& (MCB_BUSY
| MCB_NON_DISPLAY_BUSY
|
3381 MCC_BUSY
| MCD_BUSY
))
3382 reset_mask
|= RADEON_RESET_MC
;
3384 if (evergreen_is_display_hung(rdev
))
3385 reset_mask
|= RADEON_RESET_DISPLAY
;
3387 /* Skip MC reset as it's mostly likely not hung, just busy */
3388 if (reset_mask
& RADEON_RESET_MC
) {
3389 DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask
);
3390 reset_mask
&= ~RADEON_RESET_MC
;
3397 * cik_gpu_soft_reset - soft reset GPU
3399 * @rdev: radeon_device pointer
3400 * @reset_mask: mask of which blocks to reset
3402 * Soft reset the blocks specified in @reset_mask.
3404 static void cik_gpu_soft_reset(struct radeon_device
*rdev
, u32 reset_mask
)
3406 struct evergreen_mc_save save
;
3407 u32 grbm_soft_reset
= 0, srbm_soft_reset
= 0;
3410 if (reset_mask
== 0)
3413 dev_info(rdev
->dev
, "GPU softreset: 0x%08X\n", reset_mask
);
3415 cik_print_gpu_status_regs(rdev
);
3416 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
3417 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
));
3418 dev_info(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3419 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
));
3424 /* Disable GFX parsing/prefetching */
3425 WREG32(CP_ME_CNTL
, CP_ME_HALT
| CP_PFP_HALT
| CP_CE_HALT
);
3427 /* Disable MEC parsing/prefetching */
3428 WREG32(CP_MEC_CNTL
, MEC_ME1_HALT
| MEC_ME2_HALT
);
3430 if (reset_mask
& RADEON_RESET_DMA
) {
3432 tmp
= RREG32(SDMA0_ME_CNTL
+ SDMA0_REGISTER_OFFSET
);
3434 WREG32(SDMA0_ME_CNTL
+ SDMA0_REGISTER_OFFSET
, tmp
);
3436 if (reset_mask
& RADEON_RESET_DMA1
) {
3438 tmp
= RREG32(SDMA0_ME_CNTL
+ SDMA1_REGISTER_OFFSET
);
3440 WREG32(SDMA0_ME_CNTL
+ SDMA1_REGISTER_OFFSET
, tmp
);
3443 evergreen_mc_stop(rdev
, &save
);
3444 if (evergreen_mc_wait_for_idle(rdev
)) {
3445 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
3448 if (reset_mask
& (RADEON_RESET_GFX
| RADEON_RESET_COMPUTE
| RADEON_RESET_CP
))
3449 grbm_soft_reset
= SOFT_RESET_CP
| SOFT_RESET_GFX
;
3451 if (reset_mask
& RADEON_RESET_CP
) {
3452 grbm_soft_reset
|= SOFT_RESET_CP
;
3454 srbm_soft_reset
|= SOFT_RESET_GRBM
;
3457 if (reset_mask
& RADEON_RESET_DMA
)
3458 srbm_soft_reset
|= SOFT_RESET_SDMA
;
3460 if (reset_mask
& RADEON_RESET_DMA1
)
3461 srbm_soft_reset
|= SOFT_RESET_SDMA1
;
3463 if (reset_mask
& RADEON_RESET_DISPLAY
)
3464 srbm_soft_reset
|= SOFT_RESET_DC
;
3466 if (reset_mask
& RADEON_RESET_RLC
)
3467 grbm_soft_reset
|= SOFT_RESET_RLC
;
3469 if (reset_mask
& RADEON_RESET_SEM
)
3470 srbm_soft_reset
|= SOFT_RESET_SEM
;
3472 if (reset_mask
& RADEON_RESET_IH
)
3473 srbm_soft_reset
|= SOFT_RESET_IH
;
3475 if (reset_mask
& RADEON_RESET_GRBM
)
3476 srbm_soft_reset
|= SOFT_RESET_GRBM
;
3478 if (reset_mask
& RADEON_RESET_VMC
)
3479 srbm_soft_reset
|= SOFT_RESET_VMC
;
3481 if (!(rdev
->flags
& RADEON_IS_IGP
)) {
3482 if (reset_mask
& RADEON_RESET_MC
)
3483 srbm_soft_reset
|= SOFT_RESET_MC
;
3486 if (grbm_soft_reset
) {
3487 tmp
= RREG32(GRBM_SOFT_RESET
);
3488 tmp
|= grbm_soft_reset
;
3489 dev_info(rdev
->dev
, "GRBM_SOFT_RESET=0x%08X\n", tmp
);
3490 WREG32(GRBM_SOFT_RESET
, tmp
);
3491 tmp
= RREG32(GRBM_SOFT_RESET
);
3495 tmp
&= ~grbm_soft_reset
;
3496 WREG32(GRBM_SOFT_RESET
, tmp
);
3497 tmp
= RREG32(GRBM_SOFT_RESET
);
3500 if (srbm_soft_reset
) {
3501 tmp
= RREG32(SRBM_SOFT_RESET
);
3502 tmp
|= srbm_soft_reset
;
3503 dev_info(rdev
->dev
, "SRBM_SOFT_RESET=0x%08X\n", tmp
);
3504 WREG32(SRBM_SOFT_RESET
, tmp
);
3505 tmp
= RREG32(SRBM_SOFT_RESET
);
3509 tmp
&= ~srbm_soft_reset
;
3510 WREG32(SRBM_SOFT_RESET
, tmp
);
3511 tmp
= RREG32(SRBM_SOFT_RESET
);
3514 /* Wait a little for things to settle down */
3517 evergreen_mc_resume(rdev
, &save
);
3520 cik_print_gpu_status_regs(rdev
);
3524 * cik_asic_reset - soft reset GPU
3526 * @rdev: radeon_device pointer
3528 * Look up which blocks are hung and attempt
3530 * Returns 0 for success.
3532 int cik_asic_reset(struct radeon_device
*rdev
)
3536 reset_mask
= cik_gpu_check_soft_reset(rdev
);
3539 r600_set_bios_scratch_engine_hung(rdev
, true);
3541 cik_gpu_soft_reset(rdev
, reset_mask
);
3543 reset_mask
= cik_gpu_check_soft_reset(rdev
);
3546 r600_set_bios_scratch_engine_hung(rdev
, false);
3552 * cik_gfx_is_lockup - check if the 3D engine is locked up
3554 * @rdev: radeon_device pointer
3555 * @ring: radeon_ring structure holding ring information
3557 * Check if the 3D engine is locked up (CIK).
3558 * Returns true if the engine is locked, false if not.
3560 bool cik_gfx_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
3562 u32 reset_mask
= cik_gpu_check_soft_reset(rdev
);
3564 if (!(reset_mask
& (RADEON_RESET_GFX
|
3565 RADEON_RESET_COMPUTE
|
3566 RADEON_RESET_CP
))) {
3567 radeon_ring_lockup_update(ring
);
3570 /* force CP activities */
3571 radeon_ring_force_activity(rdev
, ring
);
3572 return radeon_ring_test_lockup(rdev
, ring
);
3576 * cik_sdma_is_lockup - Check if the DMA engine is locked up
3578 * @rdev: radeon_device pointer
3579 * @ring: radeon_ring structure holding ring information
3581 * Check if the async DMA engine is locked up (CIK).
3582 * Returns true if the engine appears to be locked up, false if not.
3584 bool cik_sdma_is_lockup(struct radeon_device
*rdev
, struct radeon_ring
*ring
)
3586 u32 reset_mask
= cik_gpu_check_soft_reset(rdev
);
3589 if (ring
->idx
== R600_RING_TYPE_DMA_INDEX
)
3590 mask
= RADEON_RESET_DMA
;
3592 mask
= RADEON_RESET_DMA1
;
3594 if (!(reset_mask
& mask
)) {
3595 radeon_ring_lockup_update(ring
);
3598 /* force ring activities */
3599 radeon_ring_force_activity(rdev
, ring
);
3600 return radeon_ring_test_lockup(rdev
, ring
);
3605 * cik_mc_program - program the GPU memory controller
3607 * @rdev: radeon_device pointer
3609 * Set the location of vram, gart, and AGP in the GPU's
3610 * physical address space (CIK).
3612 static void cik_mc_program(struct radeon_device
*rdev
)
3614 struct evergreen_mc_save save
;
3618 /* Initialize HDP */
3619 for (i
= 0, j
= 0; i
< 32; i
++, j
+= 0x18) {
3620 WREG32((0x2c14 + j
), 0x00000000);
3621 WREG32((0x2c18 + j
), 0x00000000);
3622 WREG32((0x2c1c + j
), 0x00000000);
3623 WREG32((0x2c20 + j
), 0x00000000);
3624 WREG32((0x2c24 + j
), 0x00000000);
3626 WREG32(HDP_REG_COHERENCY_FLUSH_CNTL
, 0);
3628 evergreen_mc_stop(rdev
, &save
);
3629 if (radeon_mc_wait_for_idle(rdev
)) {
3630 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
3632 /* Lockout access through VGA aperture*/
3633 WREG32(VGA_HDP_CONTROL
, VGA_MEMORY_DISABLE
);
3634 /* Update configuration */
3635 WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR
,
3636 rdev
->mc
.vram_start
>> 12);
3637 WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR
,
3638 rdev
->mc
.vram_end
>> 12);
3639 WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR
,
3640 rdev
->vram_scratch
.gpu_addr
>> 12);
3641 tmp
= ((rdev
->mc
.vram_end
>> 24) & 0xFFFF) << 16;
3642 tmp
|= ((rdev
->mc
.vram_start
>> 24) & 0xFFFF);
3643 WREG32(MC_VM_FB_LOCATION
, tmp
);
3644 /* XXX double check these! */
3645 WREG32(HDP_NONSURFACE_BASE
, (rdev
->mc
.vram_start
>> 8));
3646 WREG32(HDP_NONSURFACE_INFO
, (2 << 7) | (1 << 30));
3647 WREG32(HDP_NONSURFACE_SIZE
, 0x3FFFFFFF);
3648 WREG32(MC_VM_AGP_BASE
, 0);
3649 WREG32(MC_VM_AGP_TOP
, 0x0FFFFFFF);
3650 WREG32(MC_VM_AGP_BOT
, 0x0FFFFFFF);
3651 if (radeon_mc_wait_for_idle(rdev
)) {
3652 dev_warn(rdev
->dev
, "Wait for MC idle timedout !\n");
3654 evergreen_mc_resume(rdev
, &save
);
3655 /* we need to own VRAM, so turn off the VGA renderer here
3656 * to stop it overwriting our objects */
3657 rv515_vga_render_disable(rdev
);
3661 * cik_mc_init - initialize the memory controller driver params
3663 * @rdev: radeon_device pointer
3665 * Look up the amount of vram, vram width, and decide how to place
3666 * vram and gart within the GPU's physical address space (CIK).
3667 * Returns 0 for success.
3669 static int cik_mc_init(struct radeon_device
*rdev
)
3672 int chansize
, numchan
;
3674 /* Get VRAM informations */
3675 rdev
->mc
.vram_is_ddr
= true;
3676 tmp
= RREG32(MC_ARB_RAMCFG
);
3677 if (tmp
& CHANSIZE_MASK
) {
3682 tmp
= RREG32(MC_SHARED_CHMAP
);
3683 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
3713 rdev
->mc
.vram_width
= numchan
* chansize
;
3714 /* Could aper size report 0 ? */
3715 rdev
->mc
.aper_base
= pci_resource_start(rdev
->pdev
, 0);
3716 rdev
->mc
.aper_size
= pci_resource_len(rdev
->pdev
, 0);
3717 /* size in MB on si */
3718 rdev
->mc
.mc_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
3719 rdev
->mc
.real_vram_size
= RREG32(CONFIG_MEMSIZE
) * 1024 * 1024;
3720 rdev
->mc
.visible_vram_size
= rdev
->mc
.aper_size
;
3721 si_vram_gtt_location(rdev
, &rdev
->mc
);
3722 radeon_update_bandwidth_info(rdev
);
3729 * VMID 0 is the physical GPU addresses as used by the kernel.
3730 * VMIDs 1-15 are used for userspace clients and are handled
3731 * by the radeon vm/hsa code.
3734 * cik_pcie_gart_tlb_flush - gart tlb flush callback
3736 * @rdev: radeon_device pointer
3738 * Flush the TLB for the VMID 0 page table (CIK).
3740 void cik_pcie_gart_tlb_flush(struct radeon_device
*rdev
)
3742 /* flush hdp cache */
3743 WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL
, 0);
3745 /* bits 0-15 are the VM contexts0-15 */
3746 WREG32(VM_INVALIDATE_REQUEST
, 0x1);
3750 * cik_pcie_gart_enable - gart enable
3752 * @rdev: radeon_device pointer
3754 * This sets up the TLBs, programs the page tables for VMID0,
3755 * sets up the hw for VMIDs 1-15 which are allocated on
3756 * demand, and sets up the global locations for the LDS, GDS,
3757 * and GPUVM for FSA64 clients (CIK).
3758 * Returns 0 for success, errors for failure.
3760 static int cik_pcie_gart_enable(struct radeon_device
*rdev
)
3764 if (rdev
->gart
.robj
== NULL
) {
3765 dev_err(rdev
->dev
, "No VRAM object for PCIE GART.\n");
3768 r
= radeon_gart_table_vram_pin(rdev
);
3771 radeon_gart_restore(rdev
);
3772 /* Setup TLB control */
3773 WREG32(MC_VM_MX_L1_TLB_CNTL
,
3776 SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
3777 ENABLE_ADVANCED_DRIVER_MODEL
|
3778 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
3779 /* Setup L2 cache */
3780 WREG32(VM_L2_CNTL
, ENABLE_L2_CACHE
|
3781 ENABLE_L2_FRAGMENT_PROCESSING
|
3782 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
3783 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
3784 EFFECTIVE_L2_QUEUE_SIZE(7) |
3785 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3786 WREG32(VM_L2_CNTL2
, INVALIDATE_ALL_L1_TLBS
| INVALIDATE_L2_CACHE
);
3787 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
3788 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3789 /* setup context0 */
3790 WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR
, rdev
->mc
.gtt_start
>> 12);
3791 WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR
, rdev
->mc
.gtt_end
>> 12);
3792 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
, rdev
->gart
.table_addr
>> 12);
3793 WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR
,
3794 (u32
)(rdev
->dummy_page
.addr
>> 12));
3795 WREG32(VM_CONTEXT0_CNTL2
, 0);
3796 WREG32(VM_CONTEXT0_CNTL
, (ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(0) |
3797 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
));
3803 /* empty context1-15 */
3804 /* FIXME start with 4G, once using 2 level pt switch to full
3807 /* set vm size, must be a multiple of 4 */
3808 WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR
, 0);
3809 WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR
, rdev
->vm_manager
.max_pfn
);
3810 for (i
= 1; i
< 16; i
++) {
3812 WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (i
<< 2),
3813 rdev
->gart
.table_addr
>> 12);
3815 WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((i
- 8) << 2),
3816 rdev
->gart
.table_addr
>> 12);
3819 /* enable context1-15 */
3820 WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR
,
3821 (u32
)(rdev
->dummy_page
.addr
>> 12));
3822 WREG32(VM_CONTEXT1_CNTL2
, 4);
3823 WREG32(VM_CONTEXT1_CNTL
, ENABLE_CONTEXT
| PAGE_TABLE_DEPTH(1) |
3824 RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
3825 RANGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
3826 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
3827 DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT
|
3828 PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT
|
3829 PDE0_PROTECTION_FAULT_ENABLE_DEFAULT
|
3830 VALID_PROTECTION_FAULT_ENABLE_INTERRUPT
|
3831 VALID_PROTECTION_FAULT_ENABLE_DEFAULT
|
3832 READ_PROTECTION_FAULT_ENABLE_INTERRUPT
|
3833 READ_PROTECTION_FAULT_ENABLE_DEFAULT
|
3834 WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT
|
3835 WRITE_PROTECTION_FAULT_ENABLE_DEFAULT
);
3837 /* TC cache setup ??? */
3838 WREG32(TC_CFG_L1_LOAD_POLICY0
, 0);
3839 WREG32(TC_CFG_L1_LOAD_POLICY1
, 0);
3840 WREG32(TC_CFG_L1_STORE_POLICY
, 0);
3842 WREG32(TC_CFG_L2_LOAD_POLICY0
, 0);
3843 WREG32(TC_CFG_L2_LOAD_POLICY1
, 0);
3844 WREG32(TC_CFG_L2_STORE_POLICY0
, 0);
3845 WREG32(TC_CFG_L2_STORE_POLICY1
, 0);
3846 WREG32(TC_CFG_L2_ATOMIC_POLICY
, 0);
3848 WREG32(TC_CFG_L1_VOLATILE
, 0);
3849 WREG32(TC_CFG_L2_VOLATILE
, 0);
3851 if (rdev
->family
== CHIP_KAVERI
) {
3852 u32 tmp
= RREG32(CHUB_CONTROL
);
3854 WREG32(CHUB_CONTROL
, tmp
);
3857 /* XXX SH_MEM regs */
3858 /* where to put LDS, scratch, GPUVM in FSA64 space */
3859 for (i
= 0; i
< 16; i
++) {
3860 cik_srbm_select(rdev
, 0, 0, 0, i
);
3861 /* CP and shaders */
3862 WREG32(SH_MEM_CONFIG
, 0);
3863 WREG32(SH_MEM_APE1_BASE
, 1);
3864 WREG32(SH_MEM_APE1_LIMIT
, 0);
3865 WREG32(SH_MEM_BASES
, 0);
3867 WREG32(SDMA0_GFX_VIRTUAL_ADDR
+ SDMA0_REGISTER_OFFSET
, 0);
3868 WREG32(SDMA0_GFX_APE1_CNTL
+ SDMA0_REGISTER_OFFSET
, 0);
3869 WREG32(SDMA0_GFX_VIRTUAL_ADDR
+ SDMA1_REGISTER_OFFSET
, 0);
3870 WREG32(SDMA0_GFX_APE1_CNTL
+ SDMA1_REGISTER_OFFSET
, 0);
3871 /* XXX SDMA RLC - todo */
3873 cik_srbm_select(rdev
, 0, 0, 0, 0);
3875 cik_pcie_gart_tlb_flush(rdev
);
3876 DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3877 (unsigned)(rdev
->mc
.gtt_size
>> 20),
3878 (unsigned long long)rdev
->gart
.table_addr
);
3879 rdev
->gart
.ready
= true;
3884 * cik_pcie_gart_disable - gart disable
3886 * @rdev: radeon_device pointer
3888 * This disables all VM page table (CIK).
3890 static void cik_pcie_gart_disable(struct radeon_device
*rdev
)
3892 /* Disable all tables */
3893 WREG32(VM_CONTEXT0_CNTL
, 0);
3894 WREG32(VM_CONTEXT1_CNTL
, 0);
3895 /* Setup TLB control */
3896 WREG32(MC_VM_MX_L1_TLB_CNTL
, SYSTEM_ACCESS_MODE_NOT_IN_SYS
|
3897 SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU
);
3898 /* Setup L2 cache */
3900 ENABLE_L2_FRAGMENT_PROCESSING
|
3901 ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE
|
3902 ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE
|
3903 EFFECTIVE_L2_QUEUE_SIZE(7) |
3904 CONTEXT1_IDENTITY_ACCESS_MODE(1));
3905 WREG32(VM_L2_CNTL2
, 0);
3906 WREG32(VM_L2_CNTL3
, L2_CACHE_BIGK_ASSOCIATIVITY
|
3907 L2_CACHE_BIGK_FRAGMENT_SIZE(6));
3908 radeon_gart_table_vram_unpin(rdev
);
3912 * cik_pcie_gart_fini - vm fini callback
3914 * @rdev: radeon_device pointer
3916 * Tears down the driver GART/VM setup (CIK).
3918 static void cik_pcie_gart_fini(struct radeon_device
*rdev
)
3920 cik_pcie_gart_disable(rdev
);
3921 radeon_gart_table_vram_free(rdev
);
3922 radeon_gart_fini(rdev
);
3927 * cik_ib_parse - vm ib_parse callback
3929 * @rdev: radeon_device pointer
3930 * @ib: indirect buffer pointer
3932 * CIK uses hw IB checking so this is a nop (CIK).
3934 int cik_ib_parse(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
3941 * VMID 0 is the physical GPU addresses as used by the kernel.
3942 * VMIDs 1-15 are used for userspace clients and are handled
3943 * by the radeon vm/hsa code.
3946 * cik_vm_init - cik vm init callback
3948 * @rdev: radeon_device pointer
3950 * Inits cik specific vm parameters (number of VMs, base of vram for
3951 * VMIDs 1-15) (CIK).
3952 * Returns 0 for success.
3954 int cik_vm_init(struct radeon_device
*rdev
)
3957 rdev
->vm_manager
.nvm
= 16;
3958 /* base offset of vram pages */
3959 if (rdev
->flags
& RADEON_IS_IGP
) {
3960 u64 tmp
= RREG32(MC_VM_FB_OFFSET
);
3962 rdev
->vm_manager
.vram_base_offset
= tmp
;
3964 rdev
->vm_manager
.vram_base_offset
= 0;
3970 * cik_vm_fini - cik vm fini callback
3972 * @rdev: radeon_device pointer
3974 * Tear down any asic specific VM setup (CIK).
3976 void cik_vm_fini(struct radeon_device
*rdev
)
3981 * cik_vm_flush - cik vm flush using the CP
3983 * @rdev: radeon_device pointer
3985 * Update the page table base and flush the VM TLB
3986 * using the CP (CIK).
3988 void cik_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
3990 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
3995 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
3996 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
3997 WRITE_DATA_DST_SEL(0)));
3999 radeon_ring_write(ring
,
4000 (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
4002 radeon_ring_write(ring
,
4003 (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
4005 radeon_ring_write(ring
, 0);
4006 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
4008 /* update SH_MEM_* regs */
4009 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4010 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4011 WRITE_DATA_DST_SEL(0)));
4012 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
4013 radeon_ring_write(ring
, 0);
4014 radeon_ring_write(ring
, VMID(vm
->id
));
4016 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 6));
4017 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4018 WRITE_DATA_DST_SEL(0)));
4019 radeon_ring_write(ring
, SH_MEM_BASES
>> 2);
4020 radeon_ring_write(ring
, 0);
4022 radeon_ring_write(ring
, 0); /* SH_MEM_BASES */
4023 radeon_ring_write(ring
, 0); /* SH_MEM_CONFIG */
4024 radeon_ring_write(ring
, 1); /* SH_MEM_APE1_BASE */
4025 radeon_ring_write(ring
, 0); /* SH_MEM_APE1_LIMIT */
4027 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4028 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4029 WRITE_DATA_DST_SEL(0)));
4030 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
4031 radeon_ring_write(ring
, 0);
4032 radeon_ring_write(ring
, VMID(0));
4035 /* We should be using the WAIT_REG_MEM packet here like in
4036 * cik_fence_ring_emit(), but it causes the CP to hang in this
4039 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4040 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4041 WRITE_DATA_DST_SEL(0)));
4042 radeon_ring_write(ring
, HDP_MEM_COHERENCY_FLUSH_CNTL
>> 2);
4043 radeon_ring_write(ring
, 0);
4044 radeon_ring_write(ring
, 0);
4046 /* bits 0-15 are the VM contexts0-15 */
4047 radeon_ring_write(ring
, PACKET3(PACKET3_WRITE_DATA
, 3));
4048 radeon_ring_write(ring
, (WRITE_DATA_ENGINE_SEL(0) |
4049 WRITE_DATA_DST_SEL(0)));
4050 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
4051 radeon_ring_write(ring
, 0);
4052 radeon_ring_write(ring
, 1 << vm
->id
);
4054 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4055 radeon_ring_write(ring
, PACKET3(PACKET3_PFP_SYNC_ME
, 0));
4056 radeon_ring_write(ring
, 0x0);
4060 * cik_vm_set_page - update the page tables using sDMA
4062 * @rdev: radeon_device pointer
4063 * @ib: indirect buffer to fill with commands
4064 * @pe: addr of the page entry
4065 * @addr: dst addr to write into pe
4066 * @count: number of page entries to update
4067 * @incr: increase next addr by incr bytes
4068 * @flags: access flags
4070 * Update the page tables using CP or sDMA (CIK).
4072 void cik_vm_set_page(struct radeon_device
*rdev
,
4073 struct radeon_ib
*ib
,
4075 uint64_t addr
, unsigned count
,
4076 uint32_t incr
, uint32_t flags
)
4078 uint32_t r600_flags
= cayman_vm_page_flags(rdev
, flags
);
4082 if (rdev
->asic
->vm
.pt_ring_index
== RADEON_RING_TYPE_GFX_INDEX
) {
4085 ndw
= 2 + count
* 2;
4089 ib
->ptr
[ib
->length_dw
++] = PACKET3(PACKET3_WRITE_DATA
, ndw
);
4090 ib
->ptr
[ib
->length_dw
++] = (WRITE_DATA_ENGINE_SEL(0) |
4091 WRITE_DATA_DST_SEL(1));
4092 ib
->ptr
[ib
->length_dw
++] = pe
;
4093 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
4094 for (; ndw
> 2; ndw
-= 2, --count
, pe
+= 8) {
4095 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
4096 value
= radeon_vm_map_gart(rdev
, addr
);
4097 value
&= 0xFFFFFFFFFFFFF000ULL
;
4098 } else if (flags
& RADEON_VM_PAGE_VALID
) {
4104 value
|= r600_flags
;
4105 ib
->ptr
[ib
->length_dw
++] = value
;
4106 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
4111 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
4117 /* for non-physically contiguous pages (system) */
4118 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_WRITE
, SDMA_WRITE_SUB_OPCODE_LINEAR
, 0);
4119 ib
->ptr
[ib
->length_dw
++] = pe
;
4120 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
4121 ib
->ptr
[ib
->length_dw
++] = ndw
;
4122 for (; ndw
> 0; ndw
-= 2, --count
, pe
+= 8) {
4123 if (flags
& RADEON_VM_PAGE_SYSTEM
) {
4124 value
= radeon_vm_map_gart(rdev
, addr
);
4125 value
&= 0xFFFFFFFFFFFFF000ULL
;
4126 } else if (flags
& RADEON_VM_PAGE_VALID
) {
4132 value
|= r600_flags
;
4133 ib
->ptr
[ib
->length_dw
++] = value
;
4134 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
4143 if (flags
& RADEON_VM_PAGE_VALID
)
4147 /* for physically contiguous pages (vram) */
4148 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE
, 0, 0);
4149 ib
->ptr
[ib
->length_dw
++] = pe
; /* dst addr */
4150 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(pe
);
4151 ib
->ptr
[ib
->length_dw
++] = r600_flags
; /* mask */
4152 ib
->ptr
[ib
->length_dw
++] = 0;
4153 ib
->ptr
[ib
->length_dw
++] = value
; /* value */
4154 ib
->ptr
[ib
->length_dw
++] = upper_32_bits(value
);
4155 ib
->ptr
[ib
->length_dw
++] = incr
; /* increment size */
4156 ib
->ptr
[ib
->length_dw
++] = 0;
4157 ib
->ptr
[ib
->length_dw
++] = ndw
; /* number of entries */
4163 while (ib
->length_dw
& 0x7)
4164 ib
->ptr
[ib
->length_dw
++] = SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0);
4169 * cik_dma_vm_flush - cik vm flush using sDMA
4171 * @rdev: radeon_device pointer
4173 * Update the page table base and flush the VM TLB
4176 void cik_dma_vm_flush(struct radeon_device
*rdev
, int ridx
, struct radeon_vm
*vm
)
4178 struct radeon_ring
*ring
= &rdev
->ring
[ridx
];
4179 u32 extra_bits
= (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4180 SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4186 if (ridx
== R600_RING_TYPE_DMA_INDEX
)
4187 ref_and_mask
= SDMA0
;
4189 ref_and_mask
= SDMA1
;
4191 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4193 radeon_ring_write(ring
, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR
+ (vm
->id
<< 2)) >> 2);
4195 radeon_ring_write(ring
, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR
+ ((vm
->id
- 8) << 2)) >> 2);
4197 radeon_ring_write(ring
, vm
->pd_gpu_addr
>> 12);
4199 /* update SH_MEM_* regs */
4200 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4201 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
4202 radeon_ring_write(ring
, VMID(vm
->id
));
4204 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4205 radeon_ring_write(ring
, SH_MEM_BASES
>> 2);
4206 radeon_ring_write(ring
, 0);
4208 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4209 radeon_ring_write(ring
, SH_MEM_CONFIG
>> 2);
4210 radeon_ring_write(ring
, 0);
4212 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4213 radeon_ring_write(ring
, SH_MEM_APE1_BASE
>> 2);
4214 radeon_ring_write(ring
, 1);
4216 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4217 radeon_ring_write(ring
, SH_MEM_APE1_LIMIT
>> 2);
4218 radeon_ring_write(ring
, 0);
4220 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4221 radeon_ring_write(ring
, SRBM_GFX_CNTL
>> 2);
4222 radeon_ring_write(ring
, VMID(0));
4225 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM
, 0, extra_bits
));
4226 radeon_ring_write(ring
, GPU_HDP_FLUSH_DONE
);
4227 radeon_ring_write(ring
, GPU_HDP_FLUSH_REQ
);
4228 radeon_ring_write(ring
, ref_and_mask
); /* REFERENCE */
4229 radeon_ring_write(ring
, ref_and_mask
); /* MASK */
4230 radeon_ring_write(ring
, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4233 radeon_ring_write(ring
, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE
, 0, 0xf000));
4234 radeon_ring_write(ring
, VM_INVALIDATE_REQUEST
>> 2);
4235 radeon_ring_write(ring
, 1 << vm
->id
);
4240 * The RLC is a multi-purpose microengine that handles a
4241 * variety of functions, the most important of which is
4242 * the interrupt controller.
4245 * cik_rlc_stop - stop the RLC ME
4247 * @rdev: radeon_device pointer
4249 * Halt the RLC ME (MicroEngine) (CIK).
4251 static void cik_rlc_stop(struct radeon_device
*rdev
)
4256 tmp
= RREG32(CP_INT_CNTL_RING0
);
4257 tmp
&= ~(CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
4258 WREG32(CP_INT_CNTL_RING0
, tmp
);
4260 RREG32(CB_CGTT_SCLK_CTRL
);
4261 RREG32(CB_CGTT_SCLK_CTRL
);
4262 RREG32(CB_CGTT_SCLK_CTRL
);
4263 RREG32(CB_CGTT_SCLK_CTRL
);
4265 tmp
= RREG32(RLC_CGCG_CGLS_CTRL
) & 0xfffffffc;
4266 WREG32(RLC_CGCG_CGLS_CTRL
, tmp
);
4268 WREG32(RLC_CNTL
, 0);
4270 for (i
= 0; i
< rdev
->config
.cik
.max_shader_engines
; i
++) {
4271 for (j
= 0; j
< rdev
->config
.cik
.max_sh_per_se
; j
++) {
4272 cik_select_se_sh(rdev
, i
, j
);
4273 for (k
= 0; k
< rdev
->usec_timeout
; k
++) {
4274 if (RREG32(RLC_SERDES_CU_MASTER_BUSY
) == 0)
4280 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
4282 mask
= SE_MASTER_BUSY_MASK
| GC_MASTER_BUSY
| TC0_MASTER_BUSY
| TC1_MASTER_BUSY
;
4283 for (k
= 0; k
< rdev
->usec_timeout
; k
++) {
4284 if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY
) & mask
) == 0)
4291 * cik_rlc_start - start the RLC ME
4293 * @rdev: radeon_device pointer
4295 * Unhalt the RLC ME (MicroEngine) (CIK).
4297 static void cik_rlc_start(struct radeon_device
*rdev
)
4301 WREG32(RLC_CNTL
, RLC_ENABLE
);
4303 tmp
= RREG32(CP_INT_CNTL_RING0
);
4304 tmp
|= (CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
4305 WREG32(CP_INT_CNTL_RING0
, tmp
);
4311 * cik_rlc_resume - setup the RLC hw
4313 * @rdev: radeon_device pointer
4315 * Initialize the RLC registers, load the ucode,
4316 * and start the RLC (CIK).
4317 * Returns 0 for success, -EINVAL if the ucode is not available.
4319 static int cik_rlc_resume(struct radeon_device
*rdev
)
4322 u32 clear_state_info
[3];
4323 const __be32
*fw_data
;
4328 switch (rdev
->family
) {
4331 size
= BONAIRE_RLC_UCODE_SIZE
;
4334 size
= KV_RLC_UCODE_SIZE
;
4337 size
= KB_RLC_UCODE_SIZE
;
4343 WREG32(GRBM_SOFT_RESET
, SOFT_RESET_RLC
);
4344 RREG32(GRBM_SOFT_RESET
);
4346 WREG32(GRBM_SOFT_RESET
, 0);
4347 RREG32(GRBM_SOFT_RESET
);
4350 WREG32(RLC_LB_CNTR_INIT
, 0);
4351 WREG32(RLC_LB_CNTR_MAX
, 0x00008000);
4353 cik_select_se_sh(rdev
, 0xffffffff, 0xffffffff);
4354 WREG32(RLC_LB_INIT_CU_MASK
, 0xffffffff);
4355 WREG32(RLC_LB_PARAMS
, 0x00600408);
4356 WREG32(RLC_LB_CNTL
, 0x80000004);
4358 WREG32(RLC_MC_CNTL
, 0);
4359 WREG32(RLC_UCODE_CNTL
, 0);
4361 fw_data
= (const __be32
*)rdev
->rlc_fw
->data
;
4362 WREG32(RLC_GPM_UCODE_ADDR
, 0);
4363 for (i
= 0; i
< size
; i
++)
4364 WREG32(RLC_GPM_UCODE_DATA
, be32_to_cpup(fw_data
++));
4365 WREG32(RLC_GPM_UCODE_ADDR
, 0);
4368 clear_state_info
[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4369 clear_state_info
[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4370 clear_state_info
[2] = 0;//cik_default_size;
4371 WREG32(RLC_GPM_SCRATCH_ADDR
, 0x3d);
4372 for (i
= 0; i
< 3; i
++)
4373 WREG32(RLC_GPM_SCRATCH_DATA
, clear_state_info
[i
]);
4374 WREG32(RLC_DRIVER_DMA_STATUS
, 0);
4376 cik_rlc_start(rdev
);
4383 * Starting with r6xx, interrupts are handled via a ring buffer.
4384 * Ring buffers are areas of GPU accessible memory that the GPU
4385 * writes interrupt vectors into and the host reads vectors out of.
4386 * There is a rptr (read pointer) that determines where the
4387 * host is currently reading, and a wptr (write pointer)
4388 * which determines where the GPU has written. When the
4389 * pointers are equal, the ring is idle. When the GPU
4390 * writes vectors to the ring buffer, it increments the
4391 * wptr. When there is an interrupt, the host then starts
4392 * fetching commands and processing them until the pointers are
4393 * equal again at which point it updates the rptr.
4397 * cik_enable_interrupts - Enable the interrupt ring buffer
4399 * @rdev: radeon_device pointer
4401 * Enable the interrupt ring buffer (CIK).
4403 static void cik_enable_interrupts(struct radeon_device
*rdev
)
4405 u32 ih_cntl
= RREG32(IH_CNTL
);
4406 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
4408 ih_cntl
|= ENABLE_INTR
;
4409 ih_rb_cntl
|= IH_RB_ENABLE
;
4410 WREG32(IH_CNTL
, ih_cntl
);
4411 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
4412 rdev
->ih
.enabled
= true;
4416 * cik_disable_interrupts - Disable the interrupt ring buffer
4418 * @rdev: radeon_device pointer
4420 * Disable the interrupt ring buffer (CIK).
4422 static void cik_disable_interrupts(struct radeon_device
*rdev
)
4424 u32 ih_rb_cntl
= RREG32(IH_RB_CNTL
);
4425 u32 ih_cntl
= RREG32(IH_CNTL
);
4427 ih_rb_cntl
&= ~IH_RB_ENABLE
;
4428 ih_cntl
&= ~ENABLE_INTR
;
4429 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
4430 WREG32(IH_CNTL
, ih_cntl
);
4431 /* set rptr, wptr to 0 */
4432 WREG32(IH_RB_RPTR
, 0);
4433 WREG32(IH_RB_WPTR
, 0);
4434 rdev
->ih
.enabled
= false;
4439 * cik_disable_interrupt_state - Disable all interrupt sources
4441 * @rdev: radeon_device pointer
4443 * Clear all interrupt enable bits used by the driver (CIK).
4445 static void cik_disable_interrupt_state(struct radeon_device
*rdev
)
4450 WREG32(CP_INT_CNTL_RING0
, CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
);
4452 tmp
= RREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
4453 WREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
, tmp
);
4454 tmp
= RREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
4455 WREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
, tmp
);
4456 /* compute queues */
4457 WREG32(CP_ME1_PIPE0_INT_CNTL
, 0);
4458 WREG32(CP_ME1_PIPE1_INT_CNTL
, 0);
4459 WREG32(CP_ME1_PIPE2_INT_CNTL
, 0);
4460 WREG32(CP_ME1_PIPE3_INT_CNTL
, 0);
4461 WREG32(CP_ME2_PIPE0_INT_CNTL
, 0);
4462 WREG32(CP_ME2_PIPE1_INT_CNTL
, 0);
4463 WREG32(CP_ME2_PIPE2_INT_CNTL
, 0);
4464 WREG32(CP_ME2_PIPE3_INT_CNTL
, 0);
4466 WREG32(GRBM_INT_CNTL
, 0);
4467 /* vline/vblank, etc. */
4468 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, 0);
4469 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, 0);
4470 if (rdev
->num_crtc
>= 4) {
4471 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, 0);
4472 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, 0);
4474 if (rdev
->num_crtc
>= 6) {
4475 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, 0);
4476 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, 0);
4480 WREG32(DAC_AUTODETECT_INT_CONTROL
, 0);
4482 /* digital hotplug */
4483 tmp
= RREG32(DC_HPD1_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4484 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
4485 tmp
= RREG32(DC_HPD2_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4486 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
4487 tmp
= RREG32(DC_HPD3_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4488 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
4489 tmp
= RREG32(DC_HPD4_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4490 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
4491 tmp
= RREG32(DC_HPD5_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4492 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
4493 tmp
= RREG32(DC_HPD6_INT_CONTROL
) & DC_HPDx_INT_POLARITY
;
4494 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
4499 * cik_irq_init - init and enable the interrupt ring
4501 * @rdev: radeon_device pointer
4503 * Allocate a ring buffer for the interrupt controller,
4504 * enable the RLC, disable interrupts, enable the IH
4505 * ring buffer and enable it (CIK).
4506 * Called at device load and reume.
4507 * Returns 0 for success, errors for failure.
4509 static int cik_irq_init(struct radeon_device
*rdev
)
4513 u32 interrupt_cntl
, ih_cntl
, ih_rb_cntl
;
4516 ret
= r600_ih_ring_alloc(rdev
);
4521 cik_disable_interrupts(rdev
);
4524 ret
= cik_rlc_resume(rdev
);
4526 r600_ih_ring_fini(rdev
);
4530 /* setup interrupt control */
4531 /* XXX this should actually be a bus address, not an MC address. same on older asics */
4532 WREG32(INTERRUPT_CNTL2
, rdev
->ih
.gpu_addr
>> 8);
4533 interrupt_cntl
= RREG32(INTERRUPT_CNTL
);
4534 /* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
4535 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
4537 interrupt_cntl
&= ~IH_DUMMY_RD_OVERRIDE
;
4538 /* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
4539 interrupt_cntl
&= ~IH_REQ_NONSNOOP_EN
;
4540 WREG32(INTERRUPT_CNTL
, interrupt_cntl
);
4542 WREG32(IH_RB_BASE
, rdev
->ih
.gpu_addr
>> 8);
4543 rb_bufsz
= drm_order(rdev
->ih
.ring_size
/ 4);
4545 ih_rb_cntl
= (IH_WPTR_OVERFLOW_ENABLE
|
4546 IH_WPTR_OVERFLOW_CLEAR
|
4549 if (rdev
->wb
.enabled
)
4550 ih_rb_cntl
|= IH_WPTR_WRITEBACK_ENABLE
;
4552 /* set the writeback address whether it's enabled or not */
4553 WREG32(IH_RB_WPTR_ADDR_LO
, (rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFFFFFFFC);
4554 WREG32(IH_RB_WPTR_ADDR_HI
, upper_32_bits(rdev
->wb
.gpu_addr
+ R600_WB_IH_WPTR_OFFSET
) & 0xFF);
4556 WREG32(IH_RB_CNTL
, ih_rb_cntl
);
4558 /* set rptr, wptr to 0 */
4559 WREG32(IH_RB_RPTR
, 0);
4560 WREG32(IH_RB_WPTR
, 0);
4562 /* Default settings for IH_CNTL (disabled at first) */
4563 ih_cntl
= MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
4564 /* RPTR_REARM only works if msi's are enabled */
4565 if (rdev
->msi_enabled
)
4566 ih_cntl
|= RPTR_REARM
;
4567 WREG32(IH_CNTL
, ih_cntl
);
4569 /* force the active interrupt state to all disabled */
4570 cik_disable_interrupt_state(rdev
);
4572 pci_set_master(rdev
->pdev
);
4575 cik_enable_interrupts(rdev
);
4581 * cik_irq_set - enable/disable interrupt sources
4583 * @rdev: radeon_device pointer
4585 * Enable interrupt sources on the GPU (vblanks, hpd,
4587 * Returns 0 for success, errors for failure.
4589 int cik_irq_set(struct radeon_device
*rdev
)
4591 u32 cp_int_cntl
= CNTX_BUSY_INT_ENABLE
| CNTX_EMPTY_INT_ENABLE
|
4592 PRIV_INSTR_INT_ENABLE
| PRIV_REG_INT_ENABLE
;
4593 u32 crtc1
= 0, crtc2
= 0, crtc3
= 0, crtc4
= 0, crtc5
= 0, crtc6
= 0;
4594 u32 hpd1
, hpd2
, hpd3
, hpd4
, hpd5
, hpd6
;
4595 u32 grbm_int_cntl
= 0;
4596 u32 dma_cntl
, dma_cntl1
;
4598 if (!rdev
->irq
.installed
) {
4599 WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
4602 /* don't enable anything if the ih is disabled */
4603 if (!rdev
->ih
.enabled
) {
4604 cik_disable_interrupts(rdev
);
4605 /* force the active interrupt state to all disabled */
4606 cik_disable_interrupt_state(rdev
);
4610 hpd1
= RREG32(DC_HPD1_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
4611 hpd2
= RREG32(DC_HPD2_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
4612 hpd3
= RREG32(DC_HPD3_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
4613 hpd4
= RREG32(DC_HPD4_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
4614 hpd5
= RREG32(DC_HPD5_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
4615 hpd6
= RREG32(DC_HPD6_INT_CONTROL
) & ~DC_HPDx_INT_EN
;
4617 dma_cntl
= RREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
4618 dma_cntl1
= RREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
) & ~TRAP_ENABLE
;
4620 /* enable CP interrupts on all rings */
4621 if (atomic_read(&rdev
->irq
.ring_int
[RADEON_RING_TYPE_GFX_INDEX
])) {
4622 DRM_DEBUG("cik_irq_set: sw int gfx\n");
4623 cp_int_cntl
|= TIME_STAMP_INT_ENABLE
;
4625 /* TODO: compute queues! */
4626 /* CP_ME[1-2]_PIPE[0-3]_INT_CNTL */
4628 if (atomic_read(&rdev
->irq
.ring_int
[R600_RING_TYPE_DMA_INDEX
])) {
4629 DRM_DEBUG("cik_irq_set: sw int dma\n");
4630 dma_cntl
|= TRAP_ENABLE
;
4633 if (atomic_read(&rdev
->irq
.ring_int
[CAYMAN_RING_TYPE_DMA1_INDEX
])) {
4634 DRM_DEBUG("cik_irq_set: sw int dma1\n");
4635 dma_cntl1
|= TRAP_ENABLE
;
4638 if (rdev
->irq
.crtc_vblank_int
[0] ||
4639 atomic_read(&rdev
->irq
.pflip
[0])) {
4640 DRM_DEBUG("cik_irq_set: vblank 0\n");
4641 crtc1
|= VBLANK_INTERRUPT_MASK
;
4643 if (rdev
->irq
.crtc_vblank_int
[1] ||
4644 atomic_read(&rdev
->irq
.pflip
[1])) {
4645 DRM_DEBUG("cik_irq_set: vblank 1\n");
4646 crtc2
|= VBLANK_INTERRUPT_MASK
;
4648 if (rdev
->irq
.crtc_vblank_int
[2] ||
4649 atomic_read(&rdev
->irq
.pflip
[2])) {
4650 DRM_DEBUG("cik_irq_set: vblank 2\n");
4651 crtc3
|= VBLANK_INTERRUPT_MASK
;
4653 if (rdev
->irq
.crtc_vblank_int
[3] ||
4654 atomic_read(&rdev
->irq
.pflip
[3])) {
4655 DRM_DEBUG("cik_irq_set: vblank 3\n");
4656 crtc4
|= VBLANK_INTERRUPT_MASK
;
4658 if (rdev
->irq
.crtc_vblank_int
[4] ||
4659 atomic_read(&rdev
->irq
.pflip
[4])) {
4660 DRM_DEBUG("cik_irq_set: vblank 4\n");
4661 crtc5
|= VBLANK_INTERRUPT_MASK
;
4663 if (rdev
->irq
.crtc_vblank_int
[5] ||
4664 atomic_read(&rdev
->irq
.pflip
[5])) {
4665 DRM_DEBUG("cik_irq_set: vblank 5\n");
4666 crtc6
|= VBLANK_INTERRUPT_MASK
;
4668 if (rdev
->irq
.hpd
[0]) {
4669 DRM_DEBUG("cik_irq_set: hpd 1\n");
4670 hpd1
|= DC_HPDx_INT_EN
;
4672 if (rdev
->irq
.hpd
[1]) {
4673 DRM_DEBUG("cik_irq_set: hpd 2\n");
4674 hpd2
|= DC_HPDx_INT_EN
;
4676 if (rdev
->irq
.hpd
[2]) {
4677 DRM_DEBUG("cik_irq_set: hpd 3\n");
4678 hpd3
|= DC_HPDx_INT_EN
;
4680 if (rdev
->irq
.hpd
[3]) {
4681 DRM_DEBUG("cik_irq_set: hpd 4\n");
4682 hpd4
|= DC_HPDx_INT_EN
;
4684 if (rdev
->irq
.hpd
[4]) {
4685 DRM_DEBUG("cik_irq_set: hpd 5\n");
4686 hpd5
|= DC_HPDx_INT_EN
;
4688 if (rdev
->irq
.hpd
[5]) {
4689 DRM_DEBUG("cik_irq_set: hpd 6\n");
4690 hpd6
|= DC_HPDx_INT_EN
;
4693 WREG32(CP_INT_CNTL_RING0
, cp_int_cntl
);
4695 WREG32(SDMA0_CNTL
+ SDMA0_REGISTER_OFFSET
, dma_cntl
);
4696 WREG32(SDMA0_CNTL
+ SDMA1_REGISTER_OFFSET
, dma_cntl1
);
4698 WREG32(GRBM_INT_CNTL
, grbm_int_cntl
);
4700 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, crtc1
);
4701 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, crtc2
);
4702 if (rdev
->num_crtc
>= 4) {
4703 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, crtc3
);
4704 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, crtc4
);
4706 if (rdev
->num_crtc
>= 6) {
4707 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, crtc5
);
4708 WREG32(LB_INTERRUPT_MASK
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, crtc6
);
4711 WREG32(DC_HPD1_INT_CONTROL
, hpd1
);
4712 WREG32(DC_HPD2_INT_CONTROL
, hpd2
);
4713 WREG32(DC_HPD3_INT_CONTROL
, hpd3
);
4714 WREG32(DC_HPD4_INT_CONTROL
, hpd4
);
4715 WREG32(DC_HPD5_INT_CONTROL
, hpd5
);
4716 WREG32(DC_HPD6_INT_CONTROL
, hpd6
);
4722 * cik_irq_ack - ack interrupt sources
4724 * @rdev: radeon_device pointer
4726 * Ack interrupt sources on the GPU (vblanks, hpd,
4727 * etc.) (CIK). Certain interrupts sources are sw
4728 * generated and do not require an explicit ack.
4730 static inline void cik_irq_ack(struct radeon_device
*rdev
)
4734 rdev
->irq
.stat_regs
.cik
.disp_int
= RREG32(DISP_INTERRUPT_STATUS
);
4735 rdev
->irq
.stat_regs
.cik
.disp_int_cont
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE
);
4736 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE2
);
4737 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE3
);
4738 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE4
);
4739 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE5
);
4740 rdev
->irq
.stat_regs
.cik
.disp_int_cont6
= RREG32(DISP_INTERRUPT_STATUS_CONTINUE6
);
4742 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VBLANK_INTERRUPT
)
4743 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VBLANK_ACK
);
4744 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VLINE_INTERRUPT
)
4745 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC0_REGISTER_OFFSET
, VLINE_ACK
);
4746 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
)
4747 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VBLANK_ACK
);
4748 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
)
4749 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC1_REGISTER_OFFSET
, VLINE_ACK
);
4751 if (rdev
->num_crtc
>= 4) {
4752 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
)
4753 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VBLANK_ACK
);
4754 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
)
4755 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC2_REGISTER_OFFSET
, VLINE_ACK
);
4756 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
)
4757 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VBLANK_ACK
);
4758 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
)
4759 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC3_REGISTER_OFFSET
, VLINE_ACK
);
4762 if (rdev
->num_crtc
>= 6) {
4763 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
)
4764 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VBLANK_ACK
);
4765 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
)
4766 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC4_REGISTER_OFFSET
, VLINE_ACK
);
4767 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
)
4768 WREG32(LB_VBLANK_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VBLANK_ACK
);
4769 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
)
4770 WREG32(LB_VLINE_STATUS
+ EVERGREEN_CRTC5_REGISTER_OFFSET
, VLINE_ACK
);
4773 if (rdev
->irq
.stat_regs
.cik
.disp_int
& DC_HPD1_INTERRUPT
) {
4774 tmp
= RREG32(DC_HPD1_INT_CONTROL
);
4775 tmp
|= DC_HPDx_INT_ACK
;
4776 WREG32(DC_HPD1_INT_CONTROL
, tmp
);
4778 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
4779 tmp
= RREG32(DC_HPD2_INT_CONTROL
);
4780 tmp
|= DC_HPDx_INT_ACK
;
4781 WREG32(DC_HPD2_INT_CONTROL
, tmp
);
4783 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
4784 tmp
= RREG32(DC_HPD3_INT_CONTROL
);
4785 tmp
|= DC_HPDx_INT_ACK
;
4786 WREG32(DC_HPD3_INT_CONTROL
, tmp
);
4788 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
4789 tmp
= RREG32(DC_HPD4_INT_CONTROL
);
4790 tmp
|= DC_HPDx_INT_ACK
;
4791 WREG32(DC_HPD4_INT_CONTROL
, tmp
);
4793 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
4794 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
4795 tmp
|= DC_HPDx_INT_ACK
;
4796 WREG32(DC_HPD5_INT_CONTROL
, tmp
);
4798 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
4799 tmp
= RREG32(DC_HPD5_INT_CONTROL
);
4800 tmp
|= DC_HPDx_INT_ACK
;
4801 WREG32(DC_HPD6_INT_CONTROL
, tmp
);
4806 * cik_irq_disable - disable interrupts
4808 * @rdev: radeon_device pointer
4810 * Disable interrupts on the hw (CIK).
4812 static void cik_irq_disable(struct radeon_device
*rdev
)
4814 cik_disable_interrupts(rdev
);
4815 /* Wait and acknowledge irq */
4818 cik_disable_interrupt_state(rdev
);
4822 * cik_irq_disable - disable interrupts for suspend
4824 * @rdev: radeon_device pointer
4826 * Disable interrupts and stop the RLC (CIK).
4829 static void cik_irq_suspend(struct radeon_device
*rdev
)
4831 cik_irq_disable(rdev
);
4836 * cik_irq_fini - tear down interrupt support
4838 * @rdev: radeon_device pointer
4840 * Disable interrupts on the hw and free the IH ring
4842 * Used for driver unload.
4844 static void cik_irq_fini(struct radeon_device
*rdev
)
4846 cik_irq_suspend(rdev
);
4847 r600_ih_ring_fini(rdev
);
4851 * cik_get_ih_wptr - get the IH ring buffer wptr
4853 * @rdev: radeon_device pointer
4855 * Get the IH ring buffer wptr from either the register
4856 * or the writeback memory buffer (CIK). Also check for
4857 * ring buffer overflow and deal with it.
4858 * Used by cik_irq_process().
4859 * Returns the value of the wptr.
4861 static inline u32
cik_get_ih_wptr(struct radeon_device
*rdev
)
4865 if (rdev
->wb
.enabled
)
4866 wptr
= le32_to_cpu(rdev
->wb
.wb
[R600_WB_IH_WPTR_OFFSET
/4]);
4868 wptr
= RREG32(IH_RB_WPTR
);
4870 if (wptr
& RB_OVERFLOW
) {
4871 /* When a ring buffer overflow happen start parsing interrupt
4872 * from the last not overwritten vector (wptr + 16). Hopefully
4873 * this should allow us to catchup.
4875 dev_warn(rdev
->dev
, "IH ring buffer overflow (0x%08X, %d, %d)\n",
4876 wptr
, rdev
->ih
.rptr
, (wptr
+ 16) + rdev
->ih
.ptr_mask
);
4877 rdev
->ih
.rptr
= (wptr
+ 16) & rdev
->ih
.ptr_mask
;
4878 tmp
= RREG32(IH_RB_CNTL
);
4879 tmp
|= IH_WPTR_OVERFLOW_CLEAR
;
4880 WREG32(IH_RB_CNTL
, tmp
);
4882 return (wptr
& rdev
->ih
.ptr_mask
);
4886 * Each IV ring entry is 128 bits:
4887 * [7:0] - interrupt source id
4889 * [59:32] - interrupt source data
4890 * [63:60] - reserved
4893 * ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
4894 * QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
4895 * - for gfx, hw shader state (0=PS...5=LS, 6=CS)
4896 * ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
4897 * PIPE_ID - ME0 0=3D
4898 * - ME1&2 compute dispatcher (4 pipes each)
4900 * INSTANCE_ID [1:0], QUEUE_ID[1:0]
4901 * INSTANCE_ID - 0 = sdma0, 1 = sdma1
4902 * QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
4905 * [127:96] - reserved
4908 * cik_irq_process - interrupt handler
4910 * @rdev: radeon_device pointer
4912 * Interrupt hander (CIK). Walk the IH ring,
4913 * ack interrupts and schedule work to handle
4915 * Returns irq process return code.
4917 int cik_irq_process(struct radeon_device
*rdev
)
4921 u32 src_id
, src_data
, ring_id
;
4922 u8 me_id
, pipe_id
, queue_id
;
4924 bool queue_hotplug
= false;
4925 bool queue_reset
= false;
4927 if (!rdev
->ih
.enabled
|| rdev
->shutdown
)
4930 wptr
= cik_get_ih_wptr(rdev
);
4933 /* is somebody else already processing irqs? */
4934 if (atomic_xchg(&rdev
->ih
.lock
, 1))
4937 rptr
= rdev
->ih
.rptr
;
4938 DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr
, wptr
);
4940 /* Order reading of wptr vs. reading of IH ring data */
4943 /* display interrupts */
4946 while (rptr
!= wptr
) {
4947 /* wptr/rptr are in bytes! */
4948 ring_index
= rptr
/ 4;
4949 src_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
]) & 0xff;
4950 src_data
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 1]) & 0xfffffff;
4951 ring_id
= le32_to_cpu(rdev
->ih
.ring
[ring_index
+ 2]) & 0xff;
4954 case 1: /* D1 vblank/vline */
4956 case 0: /* D1 vblank */
4957 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VBLANK_INTERRUPT
) {
4958 if (rdev
->irq
.crtc_vblank_int
[0]) {
4959 drm_handle_vblank(rdev
->ddev
, 0);
4960 rdev
->pm
.vblank_sync
= true;
4961 wake_up(&rdev
->irq
.vblank_queue
);
4963 if (atomic_read(&rdev
->irq
.pflip
[0]))
4964 radeon_crtc_handle_flip(rdev
, 0);
4965 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~LB_D1_VBLANK_INTERRUPT
;
4966 DRM_DEBUG("IH: D1 vblank\n");
4969 case 1: /* D1 vline */
4970 if (rdev
->irq
.stat_regs
.cik
.disp_int
& LB_D1_VLINE_INTERRUPT
) {
4971 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~LB_D1_VLINE_INTERRUPT
;
4972 DRM_DEBUG("IH: D1 vline\n");
4976 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
4980 case 2: /* D2 vblank/vline */
4982 case 0: /* D2 vblank */
4983 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VBLANK_INTERRUPT
) {
4984 if (rdev
->irq
.crtc_vblank_int
[1]) {
4985 drm_handle_vblank(rdev
->ddev
, 1);
4986 rdev
->pm
.vblank_sync
= true;
4987 wake_up(&rdev
->irq
.vblank_queue
);
4989 if (atomic_read(&rdev
->irq
.pflip
[1]))
4990 radeon_crtc_handle_flip(rdev
, 1);
4991 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~LB_D2_VBLANK_INTERRUPT
;
4992 DRM_DEBUG("IH: D2 vblank\n");
4995 case 1: /* D2 vline */
4996 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& LB_D2_VLINE_INTERRUPT
) {
4997 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~LB_D2_VLINE_INTERRUPT
;
4998 DRM_DEBUG("IH: D2 vline\n");
5002 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5006 case 3: /* D3 vblank/vline */
5008 case 0: /* D3 vblank */
5009 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VBLANK_INTERRUPT
) {
5010 if (rdev
->irq
.crtc_vblank_int
[2]) {
5011 drm_handle_vblank(rdev
->ddev
, 2);
5012 rdev
->pm
.vblank_sync
= true;
5013 wake_up(&rdev
->irq
.vblank_queue
);
5015 if (atomic_read(&rdev
->irq
.pflip
[2]))
5016 radeon_crtc_handle_flip(rdev
, 2);
5017 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~LB_D3_VBLANK_INTERRUPT
;
5018 DRM_DEBUG("IH: D3 vblank\n");
5021 case 1: /* D3 vline */
5022 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& LB_D3_VLINE_INTERRUPT
) {
5023 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~LB_D3_VLINE_INTERRUPT
;
5024 DRM_DEBUG("IH: D3 vline\n");
5028 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5032 case 4: /* D4 vblank/vline */
5034 case 0: /* D4 vblank */
5035 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VBLANK_INTERRUPT
) {
5036 if (rdev
->irq
.crtc_vblank_int
[3]) {
5037 drm_handle_vblank(rdev
->ddev
, 3);
5038 rdev
->pm
.vblank_sync
= true;
5039 wake_up(&rdev
->irq
.vblank_queue
);
5041 if (atomic_read(&rdev
->irq
.pflip
[3]))
5042 radeon_crtc_handle_flip(rdev
, 3);
5043 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~LB_D4_VBLANK_INTERRUPT
;
5044 DRM_DEBUG("IH: D4 vblank\n");
5047 case 1: /* D4 vline */
5048 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& LB_D4_VLINE_INTERRUPT
) {
5049 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~LB_D4_VLINE_INTERRUPT
;
5050 DRM_DEBUG("IH: D4 vline\n");
5054 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5058 case 5: /* D5 vblank/vline */
5060 case 0: /* D5 vblank */
5061 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VBLANK_INTERRUPT
) {
5062 if (rdev
->irq
.crtc_vblank_int
[4]) {
5063 drm_handle_vblank(rdev
->ddev
, 4);
5064 rdev
->pm
.vblank_sync
= true;
5065 wake_up(&rdev
->irq
.vblank_queue
);
5067 if (atomic_read(&rdev
->irq
.pflip
[4]))
5068 radeon_crtc_handle_flip(rdev
, 4);
5069 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~LB_D5_VBLANK_INTERRUPT
;
5070 DRM_DEBUG("IH: D5 vblank\n");
5073 case 1: /* D5 vline */
5074 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& LB_D5_VLINE_INTERRUPT
) {
5075 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~LB_D5_VLINE_INTERRUPT
;
5076 DRM_DEBUG("IH: D5 vline\n");
5080 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5084 case 6: /* D6 vblank/vline */
5086 case 0: /* D6 vblank */
5087 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VBLANK_INTERRUPT
) {
5088 if (rdev
->irq
.crtc_vblank_int
[5]) {
5089 drm_handle_vblank(rdev
->ddev
, 5);
5090 rdev
->pm
.vblank_sync
= true;
5091 wake_up(&rdev
->irq
.vblank_queue
);
5093 if (atomic_read(&rdev
->irq
.pflip
[5]))
5094 radeon_crtc_handle_flip(rdev
, 5);
5095 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~LB_D6_VBLANK_INTERRUPT
;
5096 DRM_DEBUG("IH: D6 vblank\n");
5099 case 1: /* D6 vline */
5100 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& LB_D6_VLINE_INTERRUPT
) {
5101 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~LB_D6_VLINE_INTERRUPT
;
5102 DRM_DEBUG("IH: D6 vline\n");
5106 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5110 case 42: /* HPD hotplug */
5113 if (rdev
->irq
.stat_regs
.cik
.disp_int
& DC_HPD1_INTERRUPT
) {
5114 rdev
->irq
.stat_regs
.cik
.disp_int
&= ~DC_HPD1_INTERRUPT
;
5115 queue_hotplug
= true;
5116 DRM_DEBUG("IH: HPD1\n");
5120 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont
& DC_HPD2_INTERRUPT
) {
5121 rdev
->irq
.stat_regs
.cik
.disp_int_cont
&= ~DC_HPD2_INTERRUPT
;
5122 queue_hotplug
= true;
5123 DRM_DEBUG("IH: HPD2\n");
5127 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont2
& DC_HPD3_INTERRUPT
) {
5128 rdev
->irq
.stat_regs
.cik
.disp_int_cont2
&= ~DC_HPD3_INTERRUPT
;
5129 queue_hotplug
= true;
5130 DRM_DEBUG("IH: HPD3\n");
5134 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont3
& DC_HPD4_INTERRUPT
) {
5135 rdev
->irq
.stat_regs
.cik
.disp_int_cont3
&= ~DC_HPD4_INTERRUPT
;
5136 queue_hotplug
= true;
5137 DRM_DEBUG("IH: HPD4\n");
5141 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont4
& DC_HPD5_INTERRUPT
) {
5142 rdev
->irq
.stat_regs
.cik
.disp_int_cont4
&= ~DC_HPD5_INTERRUPT
;
5143 queue_hotplug
= true;
5144 DRM_DEBUG("IH: HPD5\n");
5148 if (rdev
->irq
.stat_regs
.cik
.disp_int_cont5
& DC_HPD6_INTERRUPT
) {
5149 rdev
->irq
.stat_regs
.cik
.disp_int_cont5
&= ~DC_HPD6_INTERRUPT
;
5150 queue_hotplug
= true;
5151 DRM_DEBUG("IH: HPD6\n");
5155 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5161 dev_err(rdev
->dev
, "GPU fault detected: %d 0x%08x\n", src_id
, src_data
);
5162 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
5163 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR
));
5164 dev_err(rdev
->dev
, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5165 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS
));
5166 /* reset addr and status */
5167 WREG32_P(VM_CONTEXT1_CNTL2
, 1, ~1);
5169 case 176: /* GFX RB CP_INT */
5170 case 177: /* GFX IB CP_INT */
5171 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
5173 case 181: /* CP EOP event */
5174 DRM_DEBUG("IH: CP EOP\n");
5175 /* XXX check the bitfield order! */
5176 me_id
= (ring_id
& 0x60) >> 5;
5177 pipe_id
= (ring_id
& 0x18) >> 3;
5178 queue_id
= (ring_id
& 0x7) >> 0;
5181 radeon_fence_process(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
5191 case 184: /* CP Privileged reg access */
5192 DRM_ERROR("Illegal register access in command stream\n");
5193 /* XXX check the bitfield order! */
5194 me_id
= (ring_id
& 0x60) >> 5;
5195 pipe_id
= (ring_id
& 0x18) >> 3;
5196 queue_id
= (ring_id
& 0x7) >> 0;
5199 /* This results in a full GPU reset, but all we need to do is soft
5200 * reset the CP for gfx
5212 case 185: /* CP Privileged inst */
5213 DRM_ERROR("Illegal instruction in command stream\n");
5214 /* XXX check the bitfield order! */
5215 me_id
= (ring_id
& 0x60) >> 5;
5216 pipe_id
= (ring_id
& 0x18) >> 3;
5217 queue_id
= (ring_id
& 0x7) >> 0;
5220 /* This results in a full GPU reset, but all we need to do is soft
5221 * reset the CP for gfx
5233 case 224: /* SDMA trap event */
5234 /* XXX check the bitfield order! */
5235 me_id
= (ring_id
& 0x3) >> 0;
5236 queue_id
= (ring_id
& 0xc) >> 2;
5237 DRM_DEBUG("IH: SDMA trap\n");
5242 radeon_fence_process(rdev
, R600_RING_TYPE_DMA_INDEX
);
5255 radeon_fence_process(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
5267 case 241: /* SDMA Privileged inst */
5268 case 247: /* SDMA Privileged inst */
5269 DRM_ERROR("Illegal instruction in SDMA command stream\n");
5270 /* XXX check the bitfield order! */
5271 me_id
= (ring_id
& 0x3) >> 0;
5272 queue_id
= (ring_id
& 0xc) >> 2;
5306 case 233: /* GUI IDLE */
5307 DRM_DEBUG("IH: GUI idle\n");
5310 DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id
, src_data
);
5314 /* wptr/rptr are in bytes! */
5316 rptr
&= rdev
->ih
.ptr_mask
;
5319 schedule_work(&rdev
->hotplug_work
);
5321 schedule_work(&rdev
->reset_work
);
5322 rdev
->ih
.rptr
= rptr
;
5323 WREG32(IH_RB_RPTR
, rdev
->ih
.rptr
);
5324 atomic_set(&rdev
->ih
.lock
, 0);
5326 /* make sure wptr hasn't changed while processing */
5327 wptr
= cik_get_ih_wptr(rdev
);
5335 * startup/shutdown callbacks
5338 * cik_startup - program the asic to a functional state
5340 * @rdev: radeon_device pointer
5342 * Programs the asic to a functional state (CIK).
5343 * Called by cik_init() and cik_resume().
5344 * Returns 0 for success, error for failure.
5346 static int cik_startup(struct radeon_device
*rdev
)
5348 struct radeon_ring
*ring
;
5351 if (rdev
->flags
& RADEON_IS_IGP
) {
5352 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
5353 !rdev
->mec_fw
|| !rdev
->sdma_fw
|| !rdev
->rlc_fw
) {
5354 r
= cik_init_microcode(rdev
);
5356 DRM_ERROR("Failed to load firmware!\n");
5361 if (!rdev
->me_fw
|| !rdev
->pfp_fw
|| !rdev
->ce_fw
||
5362 !rdev
->mec_fw
|| !rdev
->sdma_fw
|| !rdev
->rlc_fw
||
5364 r
= cik_init_microcode(rdev
);
5366 DRM_ERROR("Failed to load firmware!\n");
5371 r
= ci_mc_load_microcode(rdev
);
5373 DRM_ERROR("Failed to load MC firmware!\n");
5378 r
= r600_vram_scratch_init(rdev
);
5382 cik_mc_program(rdev
);
5383 r
= cik_pcie_gart_enable(rdev
);
5388 /* allocate rlc buffers */
5389 r
= si_rlc_init(rdev
);
5391 DRM_ERROR("Failed to init rlc BOs!\n");
5395 /* allocate wb buffer */
5396 r
= radeon_wb_init(rdev
);
5400 /* allocate mec buffers */
5401 r
= cik_mec_init(rdev
);
5403 DRM_ERROR("Failed to init MEC BOs!\n");
5407 r
= radeon_fence_driver_start_ring(rdev
, RADEON_RING_TYPE_GFX_INDEX
);
5409 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
5413 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP1_INDEX
);
5415 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
5419 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_CP2_INDEX
);
5421 dev_err(rdev
->dev
, "failed initializing CP fences (%d).\n", r
);
5425 r
= radeon_fence_driver_start_ring(rdev
, R600_RING_TYPE_DMA_INDEX
);
5427 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
5431 r
= radeon_fence_driver_start_ring(rdev
, CAYMAN_RING_TYPE_DMA1_INDEX
);
5433 dev_err(rdev
->dev
, "failed initializing DMA fences (%d).\n", r
);
5437 r
= cik_uvd_resume(rdev
);
5439 r
= radeon_fence_driver_start_ring(rdev
,
5440 R600_RING_TYPE_UVD_INDEX
);
5442 dev_err(rdev
->dev
, "UVD fences init error (%d).\n", r
);
5445 rdev
->ring
[R600_RING_TYPE_UVD_INDEX
].ring_size
= 0;
5448 if (!rdev
->irq
.installed
) {
5449 r
= radeon_irq_kms_init(rdev
);
5454 r
= cik_irq_init(rdev
);
5456 DRM_ERROR("radeon: IH init failed (%d).\n", r
);
5457 radeon_irq_kms_fini(rdev
);
5462 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
5463 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP_RPTR_OFFSET
,
5464 CP_RB0_RPTR
, CP_RB0_WPTR
,
5465 0, 0xfffff, RADEON_CP_PACKET2
);
5469 /* set up the compute queues */
5470 /* type-2 packets are deprecated on MEC, use type-3 instead */
5471 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
5472 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP1_RPTR_OFFSET
,
5473 CP_HQD_PQ_RPTR
, CP_HQD_PQ_WPTR
,
5474 0, 0xfffff, PACKET3(PACKET3_NOP
, 0x3FFF));
5477 ring
->me
= 1; /* first MEC */
5478 ring
->pipe
= 0; /* first pipe */
5479 ring
->queue
= 0; /* first queue */
5480 ring
->wptr_offs
= CIK_WB_CP1_WPTR_OFFSET
;
5482 /* type-2 packets are deprecated on MEC, use type-3 instead */
5483 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
5484 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, RADEON_WB_CP2_RPTR_OFFSET
,
5485 CP_HQD_PQ_RPTR
, CP_HQD_PQ_WPTR
,
5486 0, 0xffffffff, PACKET3(PACKET3_NOP
, 0x3FFF));
5489 /* dGPU only have 1 MEC */
5490 ring
->me
= 1; /* first MEC */
5491 ring
->pipe
= 0; /* first pipe */
5492 ring
->queue
= 1; /* second queue */
5493 ring
->wptr_offs
= CIK_WB_CP2_WPTR_OFFSET
;
5495 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
5496 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, R600_WB_DMA_RPTR_OFFSET
,
5497 SDMA0_GFX_RB_RPTR
+ SDMA0_REGISTER_OFFSET
,
5498 SDMA0_GFX_RB_WPTR
+ SDMA0_REGISTER_OFFSET
,
5499 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
5503 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
5504 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
, CAYMAN_WB_DMA1_RPTR_OFFSET
,
5505 SDMA0_GFX_RB_RPTR
+ SDMA1_REGISTER_OFFSET
,
5506 SDMA0_GFX_RB_WPTR
+ SDMA1_REGISTER_OFFSET
,
5507 2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP
, 0, 0));
5511 r
= cik_cp_resume(rdev
);
5515 r
= cik_sdma_resume(rdev
);
5519 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
5520 if (ring
->ring_size
) {
5521 r
= radeon_ring_init(rdev
, ring
, ring
->ring_size
,
5522 R600_WB_UVD_RPTR_OFFSET
,
5523 UVD_RBC_RB_RPTR
, UVD_RBC_RB_WPTR
,
5524 0, 0xfffff, RADEON_CP_PACKET2
);
5526 r
= r600_uvd_init(rdev
);
5528 DRM_ERROR("radeon: failed initializing UVD (%d).\n", r
);
5531 r
= radeon_ib_pool_init(rdev
);
5533 dev_err(rdev
->dev
, "IB initialization failed (%d).\n", r
);
5537 r
= radeon_vm_manager_init(rdev
);
5539 dev_err(rdev
->dev
, "vm manager initialization failed (%d).\n", r
);
5547 * cik_resume - resume the asic to a functional state
5549 * @rdev: radeon_device pointer
5551 * Programs the asic to a functional state (CIK).
5553 * Returns 0 for success, error for failure.
5555 int cik_resume(struct radeon_device
*rdev
)
5560 atom_asic_init(rdev
->mode_info
.atom_context
);
5562 rdev
->accel_working
= true;
5563 r
= cik_startup(rdev
);
5565 DRM_ERROR("cik startup failed on resume\n");
5566 rdev
->accel_working
= false;
5575 * cik_suspend - suspend the asic
5577 * @rdev: radeon_device pointer
5579 * Bring the chip into a state suitable for suspend (CIK).
5580 * Called at suspend.
5581 * Returns 0 for success.
5583 int cik_suspend(struct radeon_device
*rdev
)
5585 radeon_vm_manager_fini(rdev
);
5586 cik_cp_enable(rdev
, false);
5587 cik_sdma_enable(rdev
, false);
5588 r600_uvd_rbc_stop(rdev
);
5589 radeon_uvd_suspend(rdev
);
5590 cik_irq_suspend(rdev
);
5591 radeon_wb_disable(rdev
);
5592 cik_pcie_gart_disable(rdev
);
5596 /* Plan is to move initialization in that function and use
5597 * helper function so that radeon_device_init pretty much
5598 * do nothing more than calling asic specific function. This
5599 * should also allow to remove a bunch of callback function
5603 * cik_init - asic specific driver and hw init
5605 * @rdev: radeon_device pointer
5607 * Setup asic specific driver variables and program the hw
5608 * to a functional state (CIK).
5609 * Called at driver startup.
5610 * Returns 0 for success, errors for failure.
5612 int cik_init(struct radeon_device
*rdev
)
5614 struct radeon_ring
*ring
;
5618 if (!radeon_get_bios(rdev
)) {
5619 if (ASIC_IS_AVIVO(rdev
))
5622 /* Must be an ATOMBIOS */
5623 if (!rdev
->is_atom_bios
) {
5624 dev_err(rdev
->dev
, "Expecting atombios for cayman GPU\n");
5627 r
= radeon_atombios_init(rdev
);
5631 /* Post card if necessary */
5632 if (!radeon_card_posted(rdev
)) {
5634 dev_err(rdev
->dev
, "Card not posted and no BIOS - ignoring\n");
5637 DRM_INFO("GPU not posted. posting now...\n");
5638 atom_asic_init(rdev
->mode_info
.atom_context
);
5640 /* Initialize scratch registers */
5641 cik_scratch_init(rdev
);
5642 /* Initialize surface registers */
5643 radeon_surface_init(rdev
);
5644 /* Initialize clocks */
5645 radeon_get_clock_info(rdev
->ddev
);
5648 r
= radeon_fence_driver_init(rdev
);
5652 /* initialize memory controller */
5653 r
= cik_mc_init(rdev
);
5656 /* Memory manager */
5657 r
= radeon_bo_init(rdev
);
5661 ring
= &rdev
->ring
[RADEON_RING_TYPE_GFX_INDEX
];
5662 ring
->ring_obj
= NULL
;
5663 r600_ring_init(rdev
, ring
, 1024 * 1024);
5665 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP1_INDEX
];
5666 ring
->ring_obj
= NULL
;
5667 r600_ring_init(rdev
, ring
, 1024 * 1024);
5668 r
= radeon_doorbell_get(rdev
, &ring
->doorbell_page_num
);
5672 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_CP2_INDEX
];
5673 ring
->ring_obj
= NULL
;
5674 r600_ring_init(rdev
, ring
, 1024 * 1024);
5675 r
= radeon_doorbell_get(rdev
, &ring
->doorbell_page_num
);
5679 ring
= &rdev
->ring
[R600_RING_TYPE_DMA_INDEX
];
5680 ring
->ring_obj
= NULL
;
5681 r600_ring_init(rdev
, ring
, 256 * 1024);
5683 ring
= &rdev
->ring
[CAYMAN_RING_TYPE_DMA1_INDEX
];
5684 ring
->ring_obj
= NULL
;
5685 r600_ring_init(rdev
, ring
, 256 * 1024);
5687 r
= radeon_uvd_init(rdev
);
5689 ring
= &rdev
->ring
[R600_RING_TYPE_UVD_INDEX
];
5690 ring
->ring_obj
= NULL
;
5691 r600_ring_init(rdev
, ring
, 4096);
5694 rdev
->ih
.ring_obj
= NULL
;
5695 r600_ih_ring_init(rdev
, 64 * 1024);
5697 r
= r600_pcie_gart_init(rdev
);
5701 rdev
->accel_working
= true;
5702 r
= cik_startup(rdev
);
5704 dev_err(rdev
->dev
, "disabling GPU acceleration\n");
5706 cik_sdma_fini(rdev
);
5710 radeon_wb_fini(rdev
);
5711 radeon_ib_pool_fini(rdev
);
5712 radeon_vm_manager_fini(rdev
);
5713 radeon_irq_kms_fini(rdev
);
5714 cik_pcie_gart_fini(rdev
);
5715 rdev
->accel_working
= false;
5718 /* Don't start up if the MC ucode is missing.
5719 * The default clocks and voltages before the MC ucode
5720 * is loaded are not suffient for advanced operations.
5722 if (!rdev
->mc_fw
&& !(rdev
->flags
& RADEON_IS_IGP
)) {
5723 DRM_ERROR("radeon: MC ucode required for NI+.\n");
5731 * cik_fini - asic specific driver and hw fini
5733 * @rdev: radeon_device pointer
5735 * Tear down the asic specific driver variables and program the hw
5736 * to an idle state (CIK).
5737 * Called at driver unload.
5739 void cik_fini(struct radeon_device
*rdev
)
5742 cik_sdma_fini(rdev
);
5746 radeon_wb_fini(rdev
);
5747 radeon_vm_manager_fini(rdev
);
5748 radeon_ib_pool_fini(rdev
);
5749 radeon_irq_kms_fini(rdev
);
5750 radeon_uvd_fini(rdev
);
5751 cik_pcie_gart_fini(rdev
);
5752 r600_vram_scratch_fini(rdev
);
5753 radeon_gem_fini(rdev
);
5754 radeon_fence_driver_fini(rdev
);
5755 radeon_bo_fini(rdev
);
5756 radeon_atombios_fini(rdev
);
5761 /* display watermark setup */
5763 * dce8_line_buffer_adjust - Set up the line buffer
5765 * @rdev: radeon_device pointer
5766 * @radeon_crtc: the selected display controller
5767 * @mode: the current display mode on the selected display
5770 * Setup up the line buffer allocation for
5771 * the selected display controller (CIK).
5772 * Returns the line buffer size in pixels.
5774 static u32
dce8_line_buffer_adjust(struct radeon_device
*rdev
,
5775 struct radeon_crtc
*radeon_crtc
,
5776 struct drm_display_mode
*mode
)
5782 * There are 6 line buffers, one for each display controllers.
5783 * There are 3 partitions per LB. Select the number of partitions
5784 * to enable based on the display width. For display widths larger
5785 * than 4096, you need use to use 2 display controllers and combine
5786 * them using the stereo blender.
5788 if (radeon_crtc
->base
.enabled
&& mode
) {
5789 if (mode
->crtc_hdisplay
< 1920)
5791 else if (mode
->crtc_hdisplay
< 2560)
5793 else if (mode
->crtc_hdisplay
< 4096)
5796 DRM_DEBUG_KMS("Mode too big for LB!\n");
5802 WREG32(LB_MEMORY_CTRL
+ radeon_crtc
->crtc_offset
,
5803 LB_MEMORY_CONFIG(tmp
) | LB_MEMORY_SIZE(0x6B0));
5805 if (radeon_crtc
->base
.enabled
&& mode
) {
5817 /* controller not enabled, so no lb used */
5822 * cik_get_number_of_dram_channels - get the number of dram channels
5824 * @rdev: radeon_device pointer
5826 * Look up the number of video ram channels (CIK).
5827 * Used for display watermark bandwidth calculations
5828 * Returns the number of dram channels
5830 static u32
cik_get_number_of_dram_channels(struct radeon_device
*rdev
)
5832 u32 tmp
= RREG32(MC_SHARED_CHMAP
);
5834 switch ((tmp
& NOOFCHAN_MASK
) >> NOOFCHAN_SHIFT
) {
5857 struct dce8_wm_params
{
5858 u32 dram_channels
; /* number of dram channels */
5859 u32 yclk
; /* bandwidth per dram data pin in kHz */
5860 u32 sclk
; /* engine clock in kHz */
5861 u32 disp_clk
; /* display clock in kHz */
5862 u32 src_width
; /* viewport width */
5863 u32 active_time
; /* active display time in ns */
5864 u32 blank_time
; /* blank time in ns */
5865 bool interlaced
; /* mode is interlaced */
5866 fixed20_12 vsc
; /* vertical scale ratio */
5867 u32 num_heads
; /* number of active crtcs */
5868 u32 bytes_per_pixel
; /* bytes per pixel display + overlay */
5869 u32 lb_size
; /* line buffer allocated to pipe */
5870 u32 vtaps
; /* vertical scaler taps */
5874 * dce8_dram_bandwidth - get the dram bandwidth
5876 * @wm: watermark calculation data
5878 * Calculate the raw dram bandwidth (CIK).
5879 * Used for display watermark bandwidth calculations
5880 * Returns the dram bandwidth in MBytes/s
5882 static u32
dce8_dram_bandwidth(struct dce8_wm_params
*wm
)
5884 /* Calculate raw DRAM Bandwidth */
5885 fixed20_12 dram_efficiency
; /* 0.7 */
5886 fixed20_12 yclk
, dram_channels
, bandwidth
;
5889 a
.full
= dfixed_const(1000);
5890 yclk
.full
= dfixed_const(wm
->yclk
);
5891 yclk
.full
= dfixed_div(yclk
, a
);
5892 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
5893 a
.full
= dfixed_const(10);
5894 dram_efficiency
.full
= dfixed_const(7);
5895 dram_efficiency
.full
= dfixed_div(dram_efficiency
, a
);
5896 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
5897 bandwidth
.full
= dfixed_mul(bandwidth
, dram_efficiency
);
5899 return dfixed_trunc(bandwidth
);
5903 * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
5905 * @wm: watermark calculation data
5907 * Calculate the dram bandwidth used for display (CIK).
5908 * Used for display watermark bandwidth calculations
5909 * Returns the dram bandwidth for display in MBytes/s
5911 static u32
dce8_dram_bandwidth_for_display(struct dce8_wm_params
*wm
)
5913 /* Calculate DRAM Bandwidth and the part allocated to display. */
5914 fixed20_12 disp_dram_allocation
; /* 0.3 to 0.7 */
5915 fixed20_12 yclk
, dram_channels
, bandwidth
;
5918 a
.full
= dfixed_const(1000);
5919 yclk
.full
= dfixed_const(wm
->yclk
);
5920 yclk
.full
= dfixed_div(yclk
, a
);
5921 dram_channels
.full
= dfixed_const(wm
->dram_channels
* 4);
5922 a
.full
= dfixed_const(10);
5923 disp_dram_allocation
.full
= dfixed_const(3); /* XXX worse case value 0.3 */
5924 disp_dram_allocation
.full
= dfixed_div(disp_dram_allocation
, a
);
5925 bandwidth
.full
= dfixed_mul(dram_channels
, yclk
);
5926 bandwidth
.full
= dfixed_mul(bandwidth
, disp_dram_allocation
);
5928 return dfixed_trunc(bandwidth
);
5932 * dce8_data_return_bandwidth - get the data return bandwidth
5934 * @wm: watermark calculation data
5936 * Calculate the data return bandwidth used for display (CIK).
5937 * Used for display watermark bandwidth calculations
5938 * Returns the data return bandwidth in MBytes/s
5940 static u32
dce8_data_return_bandwidth(struct dce8_wm_params
*wm
)
5942 /* Calculate the display Data return Bandwidth */
5943 fixed20_12 return_efficiency
; /* 0.8 */
5944 fixed20_12 sclk
, bandwidth
;
5947 a
.full
= dfixed_const(1000);
5948 sclk
.full
= dfixed_const(wm
->sclk
);
5949 sclk
.full
= dfixed_div(sclk
, a
);
5950 a
.full
= dfixed_const(10);
5951 return_efficiency
.full
= dfixed_const(8);
5952 return_efficiency
.full
= dfixed_div(return_efficiency
, a
);
5953 a
.full
= dfixed_const(32);
5954 bandwidth
.full
= dfixed_mul(a
, sclk
);
5955 bandwidth
.full
= dfixed_mul(bandwidth
, return_efficiency
);
5957 return dfixed_trunc(bandwidth
);
5961 * dce8_dmif_request_bandwidth - get the dmif bandwidth
5963 * @wm: watermark calculation data
5965 * Calculate the dmif bandwidth used for display (CIK).
5966 * Used for display watermark bandwidth calculations
5967 * Returns the dmif bandwidth in MBytes/s
5969 static u32
dce8_dmif_request_bandwidth(struct dce8_wm_params
*wm
)
5971 /* Calculate the DMIF Request Bandwidth */
5972 fixed20_12 disp_clk_request_efficiency
; /* 0.8 */
5973 fixed20_12 disp_clk
, bandwidth
;
5976 a
.full
= dfixed_const(1000);
5977 disp_clk
.full
= dfixed_const(wm
->disp_clk
);
5978 disp_clk
.full
= dfixed_div(disp_clk
, a
);
5979 a
.full
= dfixed_const(32);
5980 b
.full
= dfixed_mul(a
, disp_clk
);
5982 a
.full
= dfixed_const(10);
5983 disp_clk_request_efficiency
.full
= dfixed_const(8);
5984 disp_clk_request_efficiency
.full
= dfixed_div(disp_clk_request_efficiency
, a
);
5986 bandwidth
.full
= dfixed_mul(b
, disp_clk_request_efficiency
);
5988 return dfixed_trunc(bandwidth
);
5992 * dce8_available_bandwidth - get the min available bandwidth
5994 * @wm: watermark calculation data
5996 * Calculate the min available bandwidth used for display (CIK).
5997 * Used for display watermark bandwidth calculations
5998 * Returns the min available bandwidth in MBytes/s
6000 static u32
dce8_available_bandwidth(struct dce8_wm_params
*wm
)
6002 /* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6003 u32 dram_bandwidth
= dce8_dram_bandwidth(wm
);
6004 u32 data_return_bandwidth
= dce8_data_return_bandwidth(wm
);
6005 u32 dmif_req_bandwidth
= dce8_dmif_request_bandwidth(wm
);
6007 return min(dram_bandwidth
, min(data_return_bandwidth
, dmif_req_bandwidth
));
6011 * dce8_average_bandwidth - get the average available bandwidth
6013 * @wm: watermark calculation data
6015 * Calculate the average available bandwidth used for display (CIK).
6016 * Used for display watermark bandwidth calculations
6017 * Returns the average available bandwidth in MBytes/s
6019 static u32
dce8_average_bandwidth(struct dce8_wm_params
*wm
)
6021 /* Calculate the display mode Average Bandwidth
6022 * DisplayMode should contain the source and destination dimensions,
6026 fixed20_12 line_time
;
6027 fixed20_12 src_width
;
6028 fixed20_12 bandwidth
;
6031 a
.full
= dfixed_const(1000);
6032 line_time
.full
= dfixed_const(wm
->active_time
+ wm
->blank_time
);
6033 line_time
.full
= dfixed_div(line_time
, a
);
6034 bpp
.full
= dfixed_const(wm
->bytes_per_pixel
);
6035 src_width
.full
= dfixed_const(wm
->src_width
);
6036 bandwidth
.full
= dfixed_mul(src_width
, bpp
);
6037 bandwidth
.full
= dfixed_mul(bandwidth
, wm
->vsc
);
6038 bandwidth
.full
= dfixed_div(bandwidth
, line_time
);
6040 return dfixed_trunc(bandwidth
);
6044 * dce8_latency_watermark - get the latency watermark
6046 * @wm: watermark calculation data
6048 * Calculate the latency watermark (CIK).
6049 * Used for display watermark bandwidth calculations
6050 * Returns the latency watermark in ns
6052 static u32
dce8_latency_watermark(struct dce8_wm_params
*wm
)
6054 /* First calculate the latency in ns */
6055 u32 mc_latency
= 2000; /* 2000 ns. */
6056 u32 available_bandwidth
= dce8_available_bandwidth(wm
);
6057 u32 worst_chunk_return_time
= (512 * 8 * 1000) / available_bandwidth
;
6058 u32 cursor_line_pair_return_time
= (128 * 4 * 1000) / available_bandwidth
;
6059 u32 dc_latency
= 40000000 / wm
->disp_clk
; /* dc pipe latency */
6060 u32 other_heads_data_return_time
= ((wm
->num_heads
+ 1) * worst_chunk_return_time
) +
6061 (wm
->num_heads
* cursor_line_pair_return_time
);
6062 u32 latency
= mc_latency
+ other_heads_data_return_time
+ dc_latency
;
6063 u32 max_src_lines_per_dst_line
, lb_fill_bw
, line_fill_time
;
6064 u32 tmp
, dmif_size
= 12288;
6067 if (wm
->num_heads
== 0)
6070 a
.full
= dfixed_const(2);
6071 b
.full
= dfixed_const(1);
6072 if ((wm
->vsc
.full
> a
.full
) ||
6073 ((wm
->vsc
.full
> b
.full
) && (wm
->vtaps
>= 3)) ||
6075 ((wm
->vsc
.full
>= a
.full
) && wm
->interlaced
))
6076 max_src_lines_per_dst_line
= 4;
6078 max_src_lines_per_dst_line
= 2;
6080 a
.full
= dfixed_const(available_bandwidth
);
6081 b
.full
= dfixed_const(wm
->num_heads
);
6082 a
.full
= dfixed_div(a
, b
);
6084 b
.full
= dfixed_const(mc_latency
+ 512);
6085 c
.full
= dfixed_const(wm
->disp_clk
);
6086 b
.full
= dfixed_div(b
, c
);
6088 c
.full
= dfixed_const(dmif_size
);
6089 b
.full
= dfixed_div(c
, b
);
6091 tmp
= min(dfixed_trunc(a
), dfixed_trunc(b
));
6093 b
.full
= dfixed_const(1000);
6094 c
.full
= dfixed_const(wm
->disp_clk
);
6095 b
.full
= dfixed_div(c
, b
);
6096 c
.full
= dfixed_const(wm
->bytes_per_pixel
);
6097 b
.full
= dfixed_mul(b
, c
);
6099 lb_fill_bw
= min(tmp
, dfixed_trunc(b
));
6101 a
.full
= dfixed_const(max_src_lines_per_dst_line
* wm
->src_width
* wm
->bytes_per_pixel
);
6102 b
.full
= dfixed_const(1000);
6103 c
.full
= dfixed_const(lb_fill_bw
);
6104 b
.full
= dfixed_div(c
, b
);
6105 a
.full
= dfixed_div(a
, b
);
6106 line_fill_time
= dfixed_trunc(a
);
6108 if (line_fill_time
< wm
->active_time
)
6111 return latency
+ (line_fill_time
- wm
->active_time
);
6116 * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6117 * average and available dram bandwidth
6119 * @wm: watermark calculation data
6121 * Check if the display average bandwidth fits in the display
6122 * dram bandwidth (CIK).
6123 * Used for display watermark bandwidth calculations
6124 * Returns true if the display fits, false if not.
6126 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params
*wm
)
6128 if (dce8_average_bandwidth(wm
) <=
6129 (dce8_dram_bandwidth_for_display(wm
) / wm
->num_heads
))
6136 * dce8_average_bandwidth_vs_available_bandwidth - check
6137 * average and available bandwidth
6139 * @wm: watermark calculation data
6141 * Check if the display average bandwidth fits in the display
6142 * available bandwidth (CIK).
6143 * Used for display watermark bandwidth calculations
6144 * Returns true if the display fits, false if not.
6146 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params
*wm
)
6148 if (dce8_average_bandwidth(wm
) <=
6149 (dce8_available_bandwidth(wm
) / wm
->num_heads
))
6156 * dce8_check_latency_hiding - check latency hiding
6158 * @wm: watermark calculation data
6160 * Check latency hiding (CIK).
6161 * Used for display watermark bandwidth calculations
6162 * Returns true if the display fits, false if not.
6164 static bool dce8_check_latency_hiding(struct dce8_wm_params
*wm
)
6166 u32 lb_partitions
= wm
->lb_size
/ wm
->src_width
;
6167 u32 line_time
= wm
->active_time
+ wm
->blank_time
;
6168 u32 latency_tolerant_lines
;
6172 a
.full
= dfixed_const(1);
6173 if (wm
->vsc
.full
> a
.full
)
6174 latency_tolerant_lines
= 1;
6176 if (lb_partitions
<= (wm
->vtaps
+ 1))
6177 latency_tolerant_lines
= 1;
6179 latency_tolerant_lines
= 2;
6182 latency_hiding
= (latency_tolerant_lines
* line_time
+ wm
->blank_time
);
6184 if (dce8_latency_watermark(wm
) <= latency_hiding
)
6191 * dce8_program_watermarks - program display watermarks
6193 * @rdev: radeon_device pointer
6194 * @radeon_crtc: the selected display controller
6195 * @lb_size: line buffer size
6196 * @num_heads: number of display controllers in use
6198 * Calculate and program the display watermarks for the
6199 * selected display controller (CIK).
6201 static void dce8_program_watermarks(struct radeon_device
*rdev
,
6202 struct radeon_crtc
*radeon_crtc
,
6203 u32 lb_size
, u32 num_heads
)
6205 struct drm_display_mode
*mode
= &radeon_crtc
->base
.mode
;
6206 struct dce8_wm_params wm
;
6209 u32 latency_watermark_a
= 0, latency_watermark_b
= 0;
6212 if (radeon_crtc
->base
.enabled
&& num_heads
&& mode
) {
6213 pixel_period
= 1000000 / (u32
)mode
->clock
;
6214 line_time
= min((u32
)mode
->crtc_htotal
* pixel_period
, (u32
)65535);
6216 wm
.yclk
= rdev
->pm
.current_mclk
* 10;
6217 wm
.sclk
= rdev
->pm
.current_sclk
* 10;
6218 wm
.disp_clk
= mode
->clock
;
6219 wm
.src_width
= mode
->crtc_hdisplay
;
6220 wm
.active_time
= mode
->crtc_hdisplay
* pixel_period
;
6221 wm
.blank_time
= line_time
- wm
.active_time
;
6222 wm
.interlaced
= false;
6223 if (mode
->flags
& DRM_MODE_FLAG_INTERLACE
)
6224 wm
.interlaced
= true;
6225 wm
.vsc
= radeon_crtc
->vsc
;
6227 if (radeon_crtc
->rmx_type
!= RMX_OFF
)
6229 wm
.bytes_per_pixel
= 4; /* XXX: get this from fb config */
6230 wm
.lb_size
= lb_size
;
6231 wm
.dram_channels
= cik_get_number_of_dram_channels(rdev
);
6232 wm
.num_heads
= num_heads
;
6234 /* set for high clocks */
6235 latency_watermark_a
= min(dce8_latency_watermark(&wm
), (u32
)65535);
6236 /* set for low clocks */
6237 /* wm.yclk = low clk; wm.sclk = low clk */
6238 latency_watermark_b
= min(dce8_latency_watermark(&wm
), (u32
)65535);
6240 /* possibly force display priority to high */
6241 /* should really do this at mode validation time... */
6242 if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm
) ||
6243 !dce8_average_bandwidth_vs_available_bandwidth(&wm
) ||
6244 !dce8_check_latency_hiding(&wm
) ||
6245 (rdev
->disp_priority
== 2)) {
6246 DRM_DEBUG_KMS("force priority to high\n");
6251 wm_mask
= RREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
);
6253 tmp
&= ~LATENCY_WATERMARK_MASK(3);
6254 tmp
|= LATENCY_WATERMARK_MASK(1);
6255 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, tmp
);
6256 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
6257 (LATENCY_LOW_WATERMARK(latency_watermark_a
) |
6258 LATENCY_HIGH_WATERMARK(line_time
)));
6260 tmp
= RREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
);
6261 tmp
&= ~LATENCY_WATERMARK_MASK(3);
6262 tmp
|= LATENCY_WATERMARK_MASK(2);
6263 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, tmp
);
6264 WREG32(DPG_PIPE_LATENCY_CONTROL
+ radeon_crtc
->crtc_offset
,
6265 (LATENCY_LOW_WATERMARK(latency_watermark_b
) |
6266 LATENCY_HIGH_WATERMARK(line_time
)));
6267 /* restore original selection */
6268 WREG32(DPG_WATERMARK_MASK_CONTROL
+ radeon_crtc
->crtc_offset
, wm_mask
);
6272 * dce8_bandwidth_update - program display watermarks
6274 * @rdev: radeon_device pointer
6276 * Calculate and program the display watermarks and line
6277 * buffer allocation (CIK).
6279 void dce8_bandwidth_update(struct radeon_device
*rdev
)
6281 struct drm_display_mode
*mode
= NULL
;
6282 u32 num_heads
= 0, lb_size
;
6285 radeon_update_display_priority(rdev
);
6287 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
6288 if (rdev
->mode_info
.crtcs
[i
]->base
.enabled
)
6291 for (i
= 0; i
< rdev
->num_crtc
; i
++) {
6292 mode
= &rdev
->mode_info
.crtcs
[i
]->base
.mode
;
6293 lb_size
= dce8_line_buffer_adjust(rdev
, rdev
->mode_info
.crtcs
[i
], mode
);
6294 dce8_program_watermarks(rdev
, rdev
->mode_info
.crtcs
[i
], lb_size
, num_heads
);
6299 * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6301 * @rdev: radeon_device pointer
6303 * Fetches a GPU clock counter snapshot (SI).
6304 * Returns the 64 bit clock counter snapshot.
6306 uint64_t cik_get_gpu_clock_counter(struct radeon_device
*rdev
)
6310 mutex_lock(&rdev
->gpu_clock_mutex
);
6311 WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT
, 1);
6312 clock
= (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB
) |
6313 ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB
) << 32ULL);
6314 mutex_unlock(&rdev
->gpu_clock_mutex
);
6318 static int cik_set_uvd_clock(struct radeon_device
*rdev
, u32 clock
,
6319 u32 cntl_reg
, u32 status_reg
)
6322 struct atom_clock_dividers dividers
;
6325 r
= radeon_atom_get_clock_dividers(rdev
, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK
,
6326 clock
, false, ÷rs
);
6330 tmp
= RREG32_SMC(cntl_reg
);
6331 tmp
&= ~(DCLK_DIR_CNTL_EN
|DCLK_DIVIDER_MASK
);
6332 tmp
|= dividers
.post_divider
;
6333 WREG32_SMC(cntl_reg
, tmp
);
6335 for (i
= 0; i
< 100; i
++) {
6336 if (RREG32_SMC(status_reg
) & DCLK_STATUS
)
6346 int cik_set_uvd_clocks(struct radeon_device
*rdev
, u32 vclk
, u32 dclk
)
6350 r
= cik_set_uvd_clock(rdev
, vclk
, CG_VCLK_CNTL
, CG_VCLK_STATUS
);
6354 r
= cik_set_uvd_clock(rdev
, dclk
, CG_DCLK_CNTL
, CG_DCLK_STATUS
);
6358 int cik_uvd_resume(struct radeon_device
*rdev
)
6364 r
= radeon_uvd_resume(rdev
);
6368 /* programm the VCPU memory controller bits 0-27 */
6369 addr
= rdev
->uvd
.gpu_addr
>> 3;
6370 size
= RADEON_GPU_PAGE_ALIGN(rdev
->uvd_fw
->size
+ 4) >> 3;
6371 WREG32(UVD_VCPU_CACHE_OFFSET0
, addr
);
6372 WREG32(UVD_VCPU_CACHE_SIZE0
, size
);
6375 size
= RADEON_UVD_STACK_SIZE
>> 3;
6376 WREG32(UVD_VCPU_CACHE_OFFSET1
, addr
);
6377 WREG32(UVD_VCPU_CACHE_SIZE1
, size
);
6380 size
= RADEON_UVD_HEAP_SIZE
>> 3;
6381 WREG32(UVD_VCPU_CACHE_OFFSET2
, addr
);
6382 WREG32(UVD_VCPU_CACHE_SIZE2
, size
);
6385 addr
= (rdev
->uvd
.gpu_addr
>> 28) & 0xF;
6386 WREG32(UVD_LMI_ADDR_EXT
, (addr
<< 12) | (addr
<< 0));
6389 addr
= (rdev
->uvd
.gpu_addr
>> 32) & 0xFF;
6390 WREG32(UVD_LMI_EXT40_ADDR
, addr
| (0x9 << 16) | (0x1 << 31));