drm/amdgpu/gfx: rework fiji cg functions so they can be shared
[deliverable/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23 #include <linux/firmware.h>
24 #include "drmP.h"
25 #include "amdgpu.h"
26 #include "amdgpu_gfx.h"
27 #include "vi.h"
28 #include "vid.h"
29 #include "amdgpu_ucode.h"
30 #include "clearstate_vi.h"
31
32 #include "gmc/gmc_8_2_d.h"
33 #include "gmc/gmc_8_2_sh_mask.h"
34
35 #include "oss/oss_3_0_d.h"
36 #include "oss/oss_3_0_sh_mask.h"
37
38 #include "bif/bif_5_0_d.h"
39 #include "bif/bif_5_0_sh_mask.h"
40
41 #include "gca/gfx_8_0_d.h"
42 #include "gca/gfx_8_0_enum.h"
43 #include "gca/gfx_8_0_sh_mask.h"
44 #include "gca/gfx_8_0_enum.h"
45
46 #include "dce/dce_10_0_d.h"
47 #include "dce/dce_10_0_sh_mask.h"
48
49 #define GFX8_NUM_GFX_RINGS 1
50 #define GFX8_NUM_COMPUTE_RINGS 8
51
52 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
53 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
54 #define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
55
56 #define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
57 #define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
58 #define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
59 #define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
60 #define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
61 #define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
62 #define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
63 #define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
64 #define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
65
66 #define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
67 #define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
68 #define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
69 #define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
70 #define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
71 #define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
72
73 /* BPM SERDES CMD */
74 #define SET_BPM_SERDES_CMD 1
75 #define CLE_BPM_SERDES_CMD 0
76
77 /* BPM Register Address*/
78 enum {
79 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
80 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
81 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
82 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
83 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
84 BPM_REG_FGCG_MAX
85 };
86
87 MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
88 MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
89 MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
90 MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
91 MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
92 MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
93
94 MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
95 MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
96 MODULE_FIRMWARE("amdgpu/stoney_me.bin");
97 MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
98 MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
99
100 MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
101 MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
102 MODULE_FIRMWARE("amdgpu/tonga_me.bin");
103 MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
104 MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
105 MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
106
107 MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
108 MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
109 MODULE_FIRMWARE("amdgpu/topaz_me.bin");
110 MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
111 MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
112
113 MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
114 MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
115 MODULE_FIRMWARE("amdgpu/fiji_me.bin");
116 MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
117 MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
118 MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
119
120 static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
121 {
122 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
123 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
124 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
125 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
126 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
127 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
128 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
129 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
130 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
131 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
132 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
133 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
134 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
135 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
136 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
137 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
138 };
139
140 static const u32 golden_settings_tonga_a11[] =
141 {
142 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
143 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
144 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
145 mmGB_GPU_ID, 0x0000000f, 0x00000000,
146 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
147 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
148 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
149 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
150 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
151 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
152 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
153 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
154 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
155 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
156 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
157 };
158
159 static const u32 tonga_golden_common_all[] =
160 {
161 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
162 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
163 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
164 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
165 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
166 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
167 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
168 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
169 };
170
171 static const u32 tonga_mgcg_cgcg_init[] =
172 {
173 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
174 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
175 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
176 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
177 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
178 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
179 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
180 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
181 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
182 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
183 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
184 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
185 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
186 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
187 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
188 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
189 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
190 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
191 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
192 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
193 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
194 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
195 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
196 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
197 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
198 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
199 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
200 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
201 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
202 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
203 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
204 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
205 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
206 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
207 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
208 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
209 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
210 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
211 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
212 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
213 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
214 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
215 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
216 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
217 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
218 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
219 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
220 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
221 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
222 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
223 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
224 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
225 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
226 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
227 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
228 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
229 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
230 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
231 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
232 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
233 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
234 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
235 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
236 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
237 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
238 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
239 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
240 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
241 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
242 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
243 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
244 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
245 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
246 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
247 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
248 };
249
250 static const u32 fiji_golden_common_all[] =
251 {
252 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
253 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
254 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
255 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
256 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
257 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
258 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
259 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
260 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
261 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
262 };
263
264 static const u32 golden_settings_fiji_a10[] =
265 {
266 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
267 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
268 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
269 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
270 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
271 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
272 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
273 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
274 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
275 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
276 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
277 };
278
279 static const u32 fiji_mgcg_cgcg_init[] =
280 {
281 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
282 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
283 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
284 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
285 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
286 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
287 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
288 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
289 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
290 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
291 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
292 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
293 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
294 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
295 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
296 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
297 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
298 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
299 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
300 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
301 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
302 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
303 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
304 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
305 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
306 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
307 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
308 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
309 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
310 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
311 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
312 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
313 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
314 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
315 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
316 };
317
318 static const u32 golden_settings_iceland_a11[] =
319 {
320 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
321 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
322 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
323 mmGB_GPU_ID, 0x0000000f, 0x00000000,
324 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
325 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
326 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
327 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
328 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
329 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
330 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
331 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
332 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
333 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
334 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
335 };
336
337 static const u32 iceland_golden_common_all[] =
338 {
339 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
340 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
341 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
342 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
343 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
344 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
345 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
346 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
347 };
348
349 static const u32 iceland_mgcg_cgcg_init[] =
350 {
351 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
352 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
353 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
354 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
355 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
356 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
357 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
358 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
359 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
360 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
361 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
362 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
363 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
364 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
365 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
366 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
367 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
369 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
370 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
371 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
372 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
373 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
374 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
375 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
376 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
377 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
378 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
379 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
380 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
381 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
382 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
383 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
384 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
385 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
386 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
387 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
388 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
389 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
390 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
391 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
392 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
393 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
394 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
395 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
396 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
397 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
398 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
399 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
400 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
401 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
402 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
403 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
404 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
405 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
406 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
407 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
408 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
409 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
410 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
411 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
412 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
413 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
414 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
415 };
416
417 static const u32 cz_golden_settings_a11[] =
418 {
419 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
420 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
421 mmGB_GPU_ID, 0x0000000f, 0x00000000,
422 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
423 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
424 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
425 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
426 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
427 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
428 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
429 };
430
431 static const u32 cz_golden_common_all[] =
432 {
433 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
434 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
435 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
436 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
437 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
438 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
439 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
440 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
441 };
442
443 static const u32 cz_mgcg_cgcg_init[] =
444 {
445 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
446 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
447 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
453 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
454 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
455 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
456 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
457 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
458 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
460 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
461 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
462 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
463 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
464 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
465 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
466 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
467 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
468 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
469 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
470 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
471 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
472 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
473 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
474 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
475 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
476 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
479 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
489 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
497 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
498 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
499 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
500 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
501 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
502 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
503 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
504 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
505 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
506 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
507 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
508 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
509 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
510 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
511 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
512 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
513 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
514 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
515 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
516 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
517 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
518 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
519 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
520 };
521
522 static const u32 stoney_golden_settings_a11[] =
523 {
524 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
525 mmGB_GPU_ID, 0x0000000f, 0x00000000,
526 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
527 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
528 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
529 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
530 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
531 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
532 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
533 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
534 };
535
536 static const u32 stoney_golden_common_all[] =
537 {
538 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
539 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
540 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
541 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
542 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
543 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
544 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
545 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
546 };
547
548 static const u32 stoney_mgcg_cgcg_init[] =
549 {
550 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
551 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
552 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
553 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
554 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
555 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
556 };
557
558 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
559 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
560 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
561 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
562
563 static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
564 {
565 switch (adev->asic_type) {
566 case CHIP_TOPAZ:
567 amdgpu_program_register_sequence(adev,
568 iceland_mgcg_cgcg_init,
569 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
570 amdgpu_program_register_sequence(adev,
571 golden_settings_iceland_a11,
572 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
573 amdgpu_program_register_sequence(adev,
574 iceland_golden_common_all,
575 (const u32)ARRAY_SIZE(iceland_golden_common_all));
576 break;
577 case CHIP_FIJI:
578 amdgpu_program_register_sequence(adev,
579 fiji_mgcg_cgcg_init,
580 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
581 amdgpu_program_register_sequence(adev,
582 golden_settings_fiji_a10,
583 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
584 amdgpu_program_register_sequence(adev,
585 fiji_golden_common_all,
586 (const u32)ARRAY_SIZE(fiji_golden_common_all));
587 break;
588
589 case CHIP_TONGA:
590 amdgpu_program_register_sequence(adev,
591 tonga_mgcg_cgcg_init,
592 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
593 amdgpu_program_register_sequence(adev,
594 golden_settings_tonga_a11,
595 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
596 amdgpu_program_register_sequence(adev,
597 tonga_golden_common_all,
598 (const u32)ARRAY_SIZE(tonga_golden_common_all));
599 break;
600 case CHIP_CARRIZO:
601 amdgpu_program_register_sequence(adev,
602 cz_mgcg_cgcg_init,
603 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
604 amdgpu_program_register_sequence(adev,
605 cz_golden_settings_a11,
606 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
607 amdgpu_program_register_sequence(adev,
608 cz_golden_common_all,
609 (const u32)ARRAY_SIZE(cz_golden_common_all));
610 break;
611 case CHIP_STONEY:
612 amdgpu_program_register_sequence(adev,
613 stoney_mgcg_cgcg_init,
614 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
615 amdgpu_program_register_sequence(adev,
616 stoney_golden_settings_a11,
617 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
618 amdgpu_program_register_sequence(adev,
619 stoney_golden_common_all,
620 (const u32)ARRAY_SIZE(stoney_golden_common_all));
621 break;
622 default:
623 break;
624 }
625 }
626
627 static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
628 {
629 int i;
630
631 adev->gfx.scratch.num_reg = 7;
632 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
633 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
634 adev->gfx.scratch.free[i] = true;
635 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
636 }
637 }
638
639 static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
640 {
641 struct amdgpu_device *adev = ring->adev;
642 uint32_t scratch;
643 uint32_t tmp = 0;
644 unsigned i;
645 int r;
646
647 r = amdgpu_gfx_scratch_get(adev, &scratch);
648 if (r) {
649 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
650 return r;
651 }
652 WREG32(scratch, 0xCAFEDEAD);
653 r = amdgpu_ring_alloc(ring, 3);
654 if (r) {
655 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
656 ring->idx, r);
657 amdgpu_gfx_scratch_free(adev, scratch);
658 return r;
659 }
660 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
661 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
662 amdgpu_ring_write(ring, 0xDEADBEEF);
663 amdgpu_ring_commit(ring);
664
665 for (i = 0; i < adev->usec_timeout; i++) {
666 tmp = RREG32(scratch);
667 if (tmp == 0xDEADBEEF)
668 break;
669 DRM_UDELAY(1);
670 }
671 if (i < adev->usec_timeout) {
672 DRM_INFO("ring test on %d succeeded in %d usecs\n",
673 ring->idx, i);
674 } else {
675 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
676 ring->idx, scratch, tmp);
677 r = -EINVAL;
678 }
679 amdgpu_gfx_scratch_free(adev, scratch);
680 return r;
681 }
682
683 static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
684 {
685 struct amdgpu_device *adev = ring->adev;
686 struct amdgpu_ib ib;
687 struct fence *f = NULL;
688 uint32_t scratch;
689 uint32_t tmp = 0;
690 unsigned i;
691 int r;
692
693 r = amdgpu_gfx_scratch_get(adev, &scratch);
694 if (r) {
695 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
696 return r;
697 }
698 WREG32(scratch, 0xCAFEDEAD);
699 memset(&ib, 0, sizeof(ib));
700 r = amdgpu_ib_get(adev, NULL, 256, &ib);
701 if (r) {
702 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
703 goto err1;
704 }
705 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
706 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
707 ib.ptr[2] = 0xDEADBEEF;
708 ib.length_dw = 3;
709
710 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
711 if (r)
712 goto err2;
713
714 r = fence_wait(f, false);
715 if (r) {
716 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
717 goto err2;
718 }
719 for (i = 0; i < adev->usec_timeout; i++) {
720 tmp = RREG32(scratch);
721 if (tmp == 0xDEADBEEF)
722 break;
723 DRM_UDELAY(1);
724 }
725 if (i < adev->usec_timeout) {
726 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
727 ring->idx, i);
728 goto err2;
729 } else {
730 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
731 scratch, tmp);
732 r = -EINVAL;
733 }
734 err2:
735 fence_put(f);
736 amdgpu_ib_free(adev, &ib, NULL);
737 fence_put(f);
738 err1:
739 amdgpu_gfx_scratch_free(adev, scratch);
740 return r;
741 }
742
743 static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
744 {
745 const char *chip_name;
746 char fw_name[30];
747 int err;
748 struct amdgpu_firmware_info *info = NULL;
749 const struct common_firmware_header *header = NULL;
750 const struct gfx_firmware_header_v1_0 *cp_hdr;
751
752 DRM_DEBUG("\n");
753
754 switch (adev->asic_type) {
755 case CHIP_TOPAZ:
756 chip_name = "topaz";
757 break;
758 case CHIP_TONGA:
759 chip_name = "tonga";
760 break;
761 case CHIP_CARRIZO:
762 chip_name = "carrizo";
763 break;
764 case CHIP_FIJI:
765 chip_name = "fiji";
766 break;
767 case CHIP_STONEY:
768 chip_name = "stoney";
769 break;
770 default:
771 BUG();
772 }
773
774 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
775 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
776 if (err)
777 goto out;
778 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
779 if (err)
780 goto out;
781 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
782 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
783 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
784
785 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
786 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
787 if (err)
788 goto out;
789 err = amdgpu_ucode_validate(adev->gfx.me_fw);
790 if (err)
791 goto out;
792 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
793 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
794 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
795
796 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
797 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
798 if (err)
799 goto out;
800 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
801 if (err)
802 goto out;
803 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
804 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
805 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
806
807 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
808 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
809 if (err)
810 goto out;
811 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
812 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data;
813 adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
814 adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
815
816 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
817 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
818 if (err)
819 goto out;
820 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
821 if (err)
822 goto out;
823 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
824 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
825 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
826
827 if ((adev->asic_type != CHIP_STONEY) &&
828 (adev->asic_type != CHIP_TOPAZ)) {
829 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
830 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
831 if (!err) {
832 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
833 if (err)
834 goto out;
835 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
836 adev->gfx.mec2_fw->data;
837 adev->gfx.mec2_fw_version =
838 le32_to_cpu(cp_hdr->header.ucode_version);
839 adev->gfx.mec2_feature_version =
840 le32_to_cpu(cp_hdr->ucode_feature_version);
841 } else {
842 err = 0;
843 adev->gfx.mec2_fw = NULL;
844 }
845 }
846
847 if (adev->firmware.smu_load) {
848 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
849 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
850 info->fw = adev->gfx.pfp_fw;
851 header = (const struct common_firmware_header *)info->fw->data;
852 adev->firmware.fw_size +=
853 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
854
855 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
856 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
857 info->fw = adev->gfx.me_fw;
858 header = (const struct common_firmware_header *)info->fw->data;
859 adev->firmware.fw_size +=
860 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
861
862 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
863 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
864 info->fw = adev->gfx.ce_fw;
865 header = (const struct common_firmware_header *)info->fw->data;
866 adev->firmware.fw_size +=
867 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
868
869 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
870 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
871 info->fw = adev->gfx.rlc_fw;
872 header = (const struct common_firmware_header *)info->fw->data;
873 adev->firmware.fw_size +=
874 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
875
876 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
877 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
878 info->fw = adev->gfx.mec_fw;
879 header = (const struct common_firmware_header *)info->fw->data;
880 adev->firmware.fw_size +=
881 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
882
883 if (adev->gfx.mec2_fw) {
884 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
885 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
886 info->fw = adev->gfx.mec2_fw;
887 header = (const struct common_firmware_header *)info->fw->data;
888 adev->firmware.fw_size +=
889 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
890 }
891
892 }
893
894 out:
895 if (err) {
896 dev_err(adev->dev,
897 "gfx8: Failed to load firmware \"%s\"\n",
898 fw_name);
899 release_firmware(adev->gfx.pfp_fw);
900 adev->gfx.pfp_fw = NULL;
901 release_firmware(adev->gfx.me_fw);
902 adev->gfx.me_fw = NULL;
903 release_firmware(adev->gfx.ce_fw);
904 adev->gfx.ce_fw = NULL;
905 release_firmware(adev->gfx.rlc_fw);
906 adev->gfx.rlc_fw = NULL;
907 release_firmware(adev->gfx.mec_fw);
908 adev->gfx.mec_fw = NULL;
909 release_firmware(adev->gfx.mec2_fw);
910 adev->gfx.mec2_fw = NULL;
911 }
912 return err;
913 }
914
915 static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
916 {
917 int r;
918
919 if (adev->gfx.mec.hpd_eop_obj) {
920 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
921 if (unlikely(r != 0))
922 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
923 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
924 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
925
926 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
927 adev->gfx.mec.hpd_eop_obj = NULL;
928 }
929 }
930
931 #define MEC_HPD_SIZE 2048
932
933 static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
934 {
935 int r;
936 u32 *hpd;
937
938 /*
939 * we assign only 1 pipe because all other pipes will
940 * be handled by KFD
941 */
942 adev->gfx.mec.num_mec = 1;
943 adev->gfx.mec.num_pipe = 1;
944 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
945
946 if (adev->gfx.mec.hpd_eop_obj == NULL) {
947 r = amdgpu_bo_create(adev,
948 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
949 PAGE_SIZE, true,
950 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
951 &adev->gfx.mec.hpd_eop_obj);
952 if (r) {
953 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
954 return r;
955 }
956 }
957
958 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
959 if (unlikely(r != 0)) {
960 gfx_v8_0_mec_fini(adev);
961 return r;
962 }
963 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
964 &adev->gfx.mec.hpd_eop_gpu_addr);
965 if (r) {
966 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
967 gfx_v8_0_mec_fini(adev);
968 return r;
969 }
970 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
971 if (r) {
972 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
973 gfx_v8_0_mec_fini(adev);
974 return r;
975 }
976
977 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
978
979 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
980 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
981
982 return 0;
983 }
984
985 static const u32 vgpr_init_compute_shader[] =
986 {
987 0x7e000209, 0x7e020208,
988 0x7e040207, 0x7e060206,
989 0x7e080205, 0x7e0a0204,
990 0x7e0c0203, 0x7e0e0202,
991 0x7e100201, 0x7e120200,
992 0x7e140209, 0x7e160208,
993 0x7e180207, 0x7e1a0206,
994 0x7e1c0205, 0x7e1e0204,
995 0x7e200203, 0x7e220202,
996 0x7e240201, 0x7e260200,
997 0x7e280209, 0x7e2a0208,
998 0x7e2c0207, 0x7e2e0206,
999 0x7e300205, 0x7e320204,
1000 0x7e340203, 0x7e360202,
1001 0x7e380201, 0x7e3a0200,
1002 0x7e3c0209, 0x7e3e0208,
1003 0x7e400207, 0x7e420206,
1004 0x7e440205, 0x7e460204,
1005 0x7e480203, 0x7e4a0202,
1006 0x7e4c0201, 0x7e4e0200,
1007 0x7e500209, 0x7e520208,
1008 0x7e540207, 0x7e560206,
1009 0x7e580205, 0x7e5a0204,
1010 0x7e5c0203, 0x7e5e0202,
1011 0x7e600201, 0x7e620200,
1012 0x7e640209, 0x7e660208,
1013 0x7e680207, 0x7e6a0206,
1014 0x7e6c0205, 0x7e6e0204,
1015 0x7e700203, 0x7e720202,
1016 0x7e740201, 0x7e760200,
1017 0x7e780209, 0x7e7a0208,
1018 0x7e7c0207, 0x7e7e0206,
1019 0xbf8a0000, 0xbf810000,
1020 };
1021
1022 static const u32 sgpr_init_compute_shader[] =
1023 {
1024 0xbe8a0100, 0xbe8c0102,
1025 0xbe8e0104, 0xbe900106,
1026 0xbe920108, 0xbe940100,
1027 0xbe960102, 0xbe980104,
1028 0xbe9a0106, 0xbe9c0108,
1029 0xbe9e0100, 0xbea00102,
1030 0xbea20104, 0xbea40106,
1031 0xbea60108, 0xbea80100,
1032 0xbeaa0102, 0xbeac0104,
1033 0xbeae0106, 0xbeb00108,
1034 0xbeb20100, 0xbeb40102,
1035 0xbeb60104, 0xbeb80106,
1036 0xbeba0108, 0xbebc0100,
1037 0xbebe0102, 0xbec00104,
1038 0xbec20106, 0xbec40108,
1039 0xbec60100, 0xbec80102,
1040 0xbee60004, 0xbee70005,
1041 0xbeea0006, 0xbeeb0007,
1042 0xbee80008, 0xbee90009,
1043 0xbefc0000, 0xbf8a0000,
1044 0xbf810000, 0x00000000,
1045 };
1046
1047 static const u32 vgpr_init_regs[] =
1048 {
1049 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1050 mmCOMPUTE_RESOURCE_LIMITS, 0,
1051 mmCOMPUTE_NUM_THREAD_X, 256*4,
1052 mmCOMPUTE_NUM_THREAD_Y, 1,
1053 mmCOMPUTE_NUM_THREAD_Z, 1,
1054 mmCOMPUTE_PGM_RSRC2, 20,
1055 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1056 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1057 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1058 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1059 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1060 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1061 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1062 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1063 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1064 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1065 };
1066
1067 static const u32 sgpr1_init_regs[] =
1068 {
1069 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1070 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1071 mmCOMPUTE_NUM_THREAD_X, 256*5,
1072 mmCOMPUTE_NUM_THREAD_Y, 1,
1073 mmCOMPUTE_NUM_THREAD_Z, 1,
1074 mmCOMPUTE_PGM_RSRC2, 20,
1075 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1076 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1077 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1078 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1079 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1080 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1081 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1082 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1083 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1084 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1085 };
1086
1087 static const u32 sgpr2_init_regs[] =
1088 {
1089 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1090 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1091 mmCOMPUTE_NUM_THREAD_X, 256*5,
1092 mmCOMPUTE_NUM_THREAD_Y, 1,
1093 mmCOMPUTE_NUM_THREAD_Z, 1,
1094 mmCOMPUTE_PGM_RSRC2, 20,
1095 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1096 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1097 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1098 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1099 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1100 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1101 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1102 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1103 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1104 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1105 };
1106
1107 static const u32 sec_ded_counter_registers[] =
1108 {
1109 mmCPC_EDC_ATC_CNT,
1110 mmCPC_EDC_SCRATCH_CNT,
1111 mmCPC_EDC_UCODE_CNT,
1112 mmCPF_EDC_ATC_CNT,
1113 mmCPF_EDC_ROQ_CNT,
1114 mmCPF_EDC_TAG_CNT,
1115 mmCPG_EDC_ATC_CNT,
1116 mmCPG_EDC_DMA_CNT,
1117 mmCPG_EDC_TAG_CNT,
1118 mmDC_EDC_CSINVOC_CNT,
1119 mmDC_EDC_RESTORE_CNT,
1120 mmDC_EDC_STATE_CNT,
1121 mmGDS_EDC_CNT,
1122 mmGDS_EDC_GRBM_CNT,
1123 mmGDS_EDC_OA_DED,
1124 mmSPI_EDC_CNT,
1125 mmSQC_ATC_EDC_GATCL1_CNT,
1126 mmSQC_EDC_CNT,
1127 mmSQ_EDC_DED_CNT,
1128 mmSQ_EDC_INFO,
1129 mmSQ_EDC_SEC_CNT,
1130 mmTCC_EDC_CNT,
1131 mmTCP_ATC_EDC_GATCL1_CNT,
1132 mmTCP_EDC_CNT,
1133 mmTD_EDC_CNT
1134 };
1135
1136 static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1137 {
1138 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1139 struct amdgpu_ib ib;
1140 struct fence *f = NULL;
1141 int r, i;
1142 u32 tmp;
1143 unsigned total_size, vgpr_offset, sgpr_offset;
1144 u64 gpu_addr;
1145
1146 /* only supported on CZ */
1147 if (adev->asic_type != CHIP_CARRIZO)
1148 return 0;
1149
1150 /* bail if the compute ring is not ready */
1151 if (!ring->ready)
1152 return 0;
1153
1154 tmp = RREG32(mmGB_EDC_MODE);
1155 WREG32(mmGB_EDC_MODE, 0);
1156
1157 total_size =
1158 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1159 total_size +=
1160 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1161 total_size +=
1162 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1163 total_size = ALIGN(total_size, 256);
1164 vgpr_offset = total_size;
1165 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1166 sgpr_offset = total_size;
1167 total_size += sizeof(sgpr_init_compute_shader);
1168
1169 /* allocate an indirect buffer to put the commands in */
1170 memset(&ib, 0, sizeof(ib));
1171 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
1172 if (r) {
1173 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1174 return r;
1175 }
1176
1177 /* load the compute shaders */
1178 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1179 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1180
1181 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1182 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1183
1184 /* init the ib length to 0 */
1185 ib.length_dw = 0;
1186
1187 /* VGPR */
1188 /* write the register state for the compute dispatch */
1189 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1190 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1191 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1192 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1193 }
1194 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1195 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1196 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1197 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1198 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1199 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1200
1201 /* write dispatch packet */
1202 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1203 ib.ptr[ib.length_dw++] = 8; /* x */
1204 ib.ptr[ib.length_dw++] = 1; /* y */
1205 ib.ptr[ib.length_dw++] = 1; /* z */
1206 ib.ptr[ib.length_dw++] =
1207 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1208
1209 /* write CS partial flush packet */
1210 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1211 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1212
1213 /* SGPR1 */
1214 /* write the register state for the compute dispatch */
1215 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1216 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1217 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1218 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1219 }
1220 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1221 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1222 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1223 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1224 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1225 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1226
1227 /* write dispatch packet */
1228 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1229 ib.ptr[ib.length_dw++] = 8; /* x */
1230 ib.ptr[ib.length_dw++] = 1; /* y */
1231 ib.ptr[ib.length_dw++] = 1; /* z */
1232 ib.ptr[ib.length_dw++] =
1233 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1234
1235 /* write CS partial flush packet */
1236 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1237 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1238
1239 /* SGPR2 */
1240 /* write the register state for the compute dispatch */
1241 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1242 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1243 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1244 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1245 }
1246 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1247 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1248 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1249 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1250 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1251 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1252
1253 /* write dispatch packet */
1254 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1255 ib.ptr[ib.length_dw++] = 8; /* x */
1256 ib.ptr[ib.length_dw++] = 1; /* y */
1257 ib.ptr[ib.length_dw++] = 1; /* z */
1258 ib.ptr[ib.length_dw++] =
1259 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1260
1261 /* write CS partial flush packet */
1262 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1263 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1264
1265 /* shedule the ib on the ring */
1266 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
1267 if (r) {
1268 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1269 goto fail;
1270 }
1271
1272 /* wait for the GPU to finish processing the IB */
1273 r = fence_wait(f, false);
1274 if (r) {
1275 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1276 goto fail;
1277 }
1278
1279 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1280 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1281 WREG32(mmGB_EDC_MODE, tmp);
1282
1283 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1284 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1285 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1286
1287
1288 /* read back registers to clear the counters */
1289 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1290 RREG32(sec_ded_counter_registers[i]);
1291
1292 fail:
1293 fence_put(f);
1294 amdgpu_ib_free(adev, &ib, NULL);
1295 fence_put(f);
1296
1297 return r;
1298 }
1299
1300 static void gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
1301 {
1302 u32 gb_addr_config;
1303 u32 mc_shared_chmap, mc_arb_ramcfg;
1304 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1305 u32 tmp;
1306
1307 switch (adev->asic_type) {
1308 case CHIP_TOPAZ:
1309 adev->gfx.config.max_shader_engines = 1;
1310 adev->gfx.config.max_tile_pipes = 2;
1311 adev->gfx.config.max_cu_per_sh = 6;
1312 adev->gfx.config.max_sh_per_se = 1;
1313 adev->gfx.config.max_backends_per_se = 2;
1314 adev->gfx.config.max_texture_channel_caches = 2;
1315 adev->gfx.config.max_gprs = 256;
1316 adev->gfx.config.max_gs_threads = 32;
1317 adev->gfx.config.max_hw_contexts = 8;
1318
1319 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1320 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1321 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1322 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1323 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1324 break;
1325 case CHIP_FIJI:
1326 adev->gfx.config.max_shader_engines = 4;
1327 adev->gfx.config.max_tile_pipes = 16;
1328 adev->gfx.config.max_cu_per_sh = 16;
1329 adev->gfx.config.max_sh_per_se = 1;
1330 adev->gfx.config.max_backends_per_se = 4;
1331 adev->gfx.config.max_texture_channel_caches = 16;
1332 adev->gfx.config.max_gprs = 256;
1333 adev->gfx.config.max_gs_threads = 32;
1334 adev->gfx.config.max_hw_contexts = 8;
1335
1336 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1337 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1338 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1339 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1340 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1341 break;
1342 case CHIP_TONGA:
1343 adev->gfx.config.max_shader_engines = 4;
1344 adev->gfx.config.max_tile_pipes = 8;
1345 adev->gfx.config.max_cu_per_sh = 8;
1346 adev->gfx.config.max_sh_per_se = 1;
1347 adev->gfx.config.max_backends_per_se = 2;
1348 adev->gfx.config.max_texture_channel_caches = 8;
1349 adev->gfx.config.max_gprs = 256;
1350 adev->gfx.config.max_gs_threads = 32;
1351 adev->gfx.config.max_hw_contexts = 8;
1352
1353 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1354 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1355 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1356 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1357 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1358 break;
1359 case CHIP_CARRIZO:
1360 adev->gfx.config.max_shader_engines = 1;
1361 adev->gfx.config.max_tile_pipes = 2;
1362 adev->gfx.config.max_sh_per_se = 1;
1363 adev->gfx.config.max_backends_per_se = 2;
1364
1365 switch (adev->pdev->revision) {
1366 case 0xc4:
1367 case 0x84:
1368 case 0xc8:
1369 case 0xcc:
1370 case 0xe1:
1371 case 0xe3:
1372 /* B10 */
1373 adev->gfx.config.max_cu_per_sh = 8;
1374 break;
1375 case 0xc5:
1376 case 0x81:
1377 case 0x85:
1378 case 0xc9:
1379 case 0xcd:
1380 case 0xe2:
1381 case 0xe4:
1382 /* B8 */
1383 adev->gfx.config.max_cu_per_sh = 6;
1384 break;
1385 case 0xc6:
1386 case 0xca:
1387 case 0xce:
1388 case 0x88:
1389 /* B6 */
1390 adev->gfx.config.max_cu_per_sh = 6;
1391 break;
1392 case 0xc7:
1393 case 0x87:
1394 case 0xcb:
1395 case 0xe5:
1396 case 0x89:
1397 default:
1398 /* B4 */
1399 adev->gfx.config.max_cu_per_sh = 4;
1400 break;
1401 }
1402
1403 adev->gfx.config.max_texture_channel_caches = 2;
1404 adev->gfx.config.max_gprs = 256;
1405 adev->gfx.config.max_gs_threads = 32;
1406 adev->gfx.config.max_hw_contexts = 8;
1407
1408 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1409 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1410 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1411 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1412 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1413 break;
1414 case CHIP_STONEY:
1415 adev->gfx.config.max_shader_engines = 1;
1416 adev->gfx.config.max_tile_pipes = 2;
1417 adev->gfx.config.max_sh_per_se = 1;
1418 adev->gfx.config.max_backends_per_se = 1;
1419
1420 switch (adev->pdev->revision) {
1421 case 0xc0:
1422 case 0xc1:
1423 case 0xc2:
1424 case 0xc4:
1425 case 0xc8:
1426 case 0xc9:
1427 adev->gfx.config.max_cu_per_sh = 3;
1428 break;
1429 case 0xd0:
1430 case 0xd1:
1431 case 0xd2:
1432 default:
1433 adev->gfx.config.max_cu_per_sh = 2;
1434 break;
1435 }
1436
1437 adev->gfx.config.max_texture_channel_caches = 2;
1438 adev->gfx.config.max_gprs = 256;
1439 adev->gfx.config.max_gs_threads = 16;
1440 adev->gfx.config.max_hw_contexts = 8;
1441
1442 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1443 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1444 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1445 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1446 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1447 break;
1448 default:
1449 adev->gfx.config.max_shader_engines = 2;
1450 adev->gfx.config.max_tile_pipes = 4;
1451 adev->gfx.config.max_cu_per_sh = 2;
1452 adev->gfx.config.max_sh_per_se = 1;
1453 adev->gfx.config.max_backends_per_se = 2;
1454 adev->gfx.config.max_texture_channel_caches = 4;
1455 adev->gfx.config.max_gprs = 256;
1456 adev->gfx.config.max_gs_threads = 32;
1457 adev->gfx.config.max_hw_contexts = 8;
1458
1459 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1460 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1461 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1462 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1463 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1464 break;
1465 }
1466
1467 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1468 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1469 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1470
1471 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1472 adev->gfx.config.mem_max_burst_length_bytes = 256;
1473 if (adev->flags & AMD_IS_APU) {
1474 /* Get memory bank mapping mode. */
1475 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1476 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1477 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1478
1479 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1480 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1481 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1482
1483 /* Validate settings in case only one DIMM installed. */
1484 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1485 dimm00_addr_map = 0;
1486 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1487 dimm01_addr_map = 0;
1488 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1489 dimm10_addr_map = 0;
1490 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1491 dimm11_addr_map = 0;
1492
1493 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1494 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1495 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1496 adev->gfx.config.mem_row_size_in_kb = 2;
1497 else
1498 adev->gfx.config.mem_row_size_in_kb = 1;
1499 } else {
1500 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1501 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1502 if (adev->gfx.config.mem_row_size_in_kb > 4)
1503 adev->gfx.config.mem_row_size_in_kb = 4;
1504 }
1505
1506 adev->gfx.config.shader_engine_tile_size = 32;
1507 adev->gfx.config.num_gpus = 1;
1508 adev->gfx.config.multi_gpu_tile_size = 64;
1509
1510 /* fix up row size */
1511 switch (adev->gfx.config.mem_row_size_in_kb) {
1512 case 1:
1513 default:
1514 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1515 break;
1516 case 2:
1517 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1518 break;
1519 case 4:
1520 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1521 break;
1522 }
1523 adev->gfx.config.gb_addr_config = gb_addr_config;
1524 }
1525
1526 static int gfx_v8_0_sw_init(void *handle)
1527 {
1528 int i, r;
1529 struct amdgpu_ring *ring;
1530 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1531
1532 /* EOP Event */
1533 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1534 if (r)
1535 return r;
1536
1537 /* Privileged reg */
1538 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1539 if (r)
1540 return r;
1541
1542 /* Privileged inst */
1543 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
1544 if (r)
1545 return r;
1546
1547 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
1548
1549 gfx_v8_0_scratch_init(adev);
1550
1551 r = gfx_v8_0_init_microcode(adev);
1552 if (r) {
1553 DRM_ERROR("Failed to load gfx firmware!\n");
1554 return r;
1555 }
1556
1557 r = gfx_v8_0_mec_init(adev);
1558 if (r) {
1559 DRM_ERROR("Failed to init MEC BOs!\n");
1560 return r;
1561 }
1562
1563 /* set up the gfx ring */
1564 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
1565 ring = &adev->gfx.gfx_ring[i];
1566 ring->ring_obj = NULL;
1567 sprintf(ring->name, "gfx");
1568 /* no gfx doorbells on iceland */
1569 if (adev->asic_type != CHIP_TOPAZ) {
1570 ring->use_doorbell = true;
1571 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
1572 }
1573
1574 r = amdgpu_ring_init(adev, ring, 1024,
1575 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1576 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
1577 AMDGPU_RING_TYPE_GFX);
1578 if (r)
1579 return r;
1580 }
1581
1582 /* set up the compute queues */
1583 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
1584 unsigned irq_type;
1585
1586 /* max 32 queues per MEC */
1587 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
1588 DRM_ERROR("Too many (%d) compute rings!\n", i);
1589 break;
1590 }
1591 ring = &adev->gfx.compute_ring[i];
1592 ring->ring_obj = NULL;
1593 ring->use_doorbell = true;
1594 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
1595 ring->me = 1; /* first MEC */
1596 ring->pipe = i / 8;
1597 ring->queue = i % 8;
1598 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
1599 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
1600 /* type-2 packets are deprecated on MEC, use type-3 instead */
1601 r = amdgpu_ring_init(adev, ring, 1024,
1602 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
1603 &adev->gfx.eop_irq, irq_type,
1604 AMDGPU_RING_TYPE_COMPUTE);
1605 if (r)
1606 return r;
1607 }
1608
1609 /* reserve GDS, GWS and OA resource for gfx */
1610 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
1611 PAGE_SIZE, true,
1612 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
1613 NULL, &adev->gds.gds_gfx_bo);
1614 if (r)
1615 return r;
1616
1617 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
1618 PAGE_SIZE, true,
1619 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
1620 NULL, &adev->gds.gws_gfx_bo);
1621 if (r)
1622 return r;
1623
1624 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
1625 PAGE_SIZE, true,
1626 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
1627 NULL, &adev->gds.oa_gfx_bo);
1628 if (r)
1629 return r;
1630
1631 adev->gfx.ce_ram_size = 0x8000;
1632
1633 gfx_v8_0_gpu_early_init(adev);
1634
1635 return 0;
1636 }
1637
1638 static int gfx_v8_0_sw_fini(void *handle)
1639 {
1640 int i;
1641 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1642
1643 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
1644 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
1645 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
1646
1647 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
1648 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
1649 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1650 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1651
1652 gfx_v8_0_mec_fini(adev);
1653
1654 return 0;
1655 }
1656
1657 static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
1658 {
1659 uint32_t *modearray, *mod2array;
1660 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
1661 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
1662 u32 reg_offset;
1663
1664 modearray = adev->gfx.config.tile_mode_array;
1665 mod2array = adev->gfx.config.macrotile_mode_array;
1666
1667 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1668 modearray[reg_offset] = 0;
1669
1670 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1671 mod2array[reg_offset] = 0;
1672
1673 switch (adev->asic_type) {
1674 case CHIP_TOPAZ:
1675 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1676 PIPE_CONFIG(ADDR_SURF_P2) |
1677 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1678 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1679 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1680 PIPE_CONFIG(ADDR_SURF_P2) |
1681 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1682 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1683 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1684 PIPE_CONFIG(ADDR_SURF_P2) |
1685 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1686 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1687 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1688 PIPE_CONFIG(ADDR_SURF_P2) |
1689 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1690 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1691 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1692 PIPE_CONFIG(ADDR_SURF_P2) |
1693 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1694 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1695 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1696 PIPE_CONFIG(ADDR_SURF_P2) |
1697 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1698 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1699 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1700 PIPE_CONFIG(ADDR_SURF_P2) |
1701 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1702 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1703 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1704 PIPE_CONFIG(ADDR_SURF_P2));
1705 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1706 PIPE_CONFIG(ADDR_SURF_P2) |
1707 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1708 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1709 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1710 PIPE_CONFIG(ADDR_SURF_P2) |
1711 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1712 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1713 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1714 PIPE_CONFIG(ADDR_SURF_P2) |
1715 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1716 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1717 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1718 PIPE_CONFIG(ADDR_SURF_P2) |
1719 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1720 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1721 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1722 PIPE_CONFIG(ADDR_SURF_P2) |
1723 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1724 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1725 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1726 PIPE_CONFIG(ADDR_SURF_P2) |
1727 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1728 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1729 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1730 PIPE_CONFIG(ADDR_SURF_P2) |
1731 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1732 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1733 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1734 PIPE_CONFIG(ADDR_SURF_P2) |
1735 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1736 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1737 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1738 PIPE_CONFIG(ADDR_SURF_P2) |
1739 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1740 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1741 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1742 PIPE_CONFIG(ADDR_SURF_P2) |
1743 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1744 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1745 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1746 PIPE_CONFIG(ADDR_SURF_P2) |
1747 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1748 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1749 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1750 PIPE_CONFIG(ADDR_SURF_P2) |
1751 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1752 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1753 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1754 PIPE_CONFIG(ADDR_SURF_P2) |
1755 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1756 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1757 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1758 PIPE_CONFIG(ADDR_SURF_P2) |
1759 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1760 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1761 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1762 PIPE_CONFIG(ADDR_SURF_P2) |
1763 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1764 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1765 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1766 PIPE_CONFIG(ADDR_SURF_P2) |
1767 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1768 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1769 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1770 PIPE_CONFIG(ADDR_SURF_P2) |
1771 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1772 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1773 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1774 PIPE_CONFIG(ADDR_SURF_P2) |
1775 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1776 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1777
1778 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1779 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1780 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1781 NUM_BANKS(ADDR_SURF_8_BANK));
1782 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1783 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1784 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1785 NUM_BANKS(ADDR_SURF_8_BANK));
1786 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1787 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1788 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1789 NUM_BANKS(ADDR_SURF_8_BANK));
1790 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1791 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1792 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1793 NUM_BANKS(ADDR_SURF_8_BANK));
1794 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1795 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1796 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1797 NUM_BANKS(ADDR_SURF_8_BANK));
1798 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1799 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1800 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1801 NUM_BANKS(ADDR_SURF_8_BANK));
1802 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1803 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1804 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1805 NUM_BANKS(ADDR_SURF_8_BANK));
1806 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1807 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1808 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1809 NUM_BANKS(ADDR_SURF_16_BANK));
1810 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1811 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1812 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1813 NUM_BANKS(ADDR_SURF_16_BANK));
1814 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1815 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1816 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1817 NUM_BANKS(ADDR_SURF_16_BANK));
1818 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1819 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1820 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1821 NUM_BANKS(ADDR_SURF_16_BANK));
1822 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1823 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1824 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1825 NUM_BANKS(ADDR_SURF_16_BANK));
1826 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1827 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1828 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1829 NUM_BANKS(ADDR_SURF_16_BANK));
1830 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1831 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1832 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1833 NUM_BANKS(ADDR_SURF_8_BANK));
1834
1835 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
1836 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
1837 reg_offset != 23)
1838 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
1839
1840 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
1841 if (reg_offset != 7)
1842 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
1843
1844 break;
1845 case CHIP_FIJI:
1846 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1847 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1848 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1849 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1850 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1851 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1852 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1853 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1854 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1855 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1856 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1857 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1858 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1860 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1861 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1862 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1863 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1864 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1865 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1866 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1867 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1868 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1869 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1870 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1871 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1872 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1873 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1874 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1875 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1876 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1877 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1878 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1879 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
1880 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1881 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1882 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1883 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1884 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1885 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1886 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1887 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1888 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1889 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1890 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1891 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1892 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1893 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1894 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1895 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1896 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1897 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1898 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1899 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1900 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1901 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1902 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1903 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1904 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1905 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1906 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1907 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1908 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1909 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1910 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1911 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1912 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1913 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1914 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1915 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1916 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1917 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1918 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1919 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1920 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1921 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1922 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1923 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1924 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1925 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1926 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1927 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1928 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1929 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1930 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1931 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1932 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1933 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1934 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1936 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1937 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1938 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1940 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1941 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1942 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1944 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1945 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1946 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1948 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1949 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1950 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1952 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1953 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1954 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1956 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1957 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1958 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1960 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1961 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
1962 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1964 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1965 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1966 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1968
1969 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1970 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1971 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1972 NUM_BANKS(ADDR_SURF_8_BANK));
1973 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1974 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1975 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1976 NUM_BANKS(ADDR_SURF_8_BANK));
1977 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1978 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1979 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1980 NUM_BANKS(ADDR_SURF_8_BANK));
1981 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1982 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1983 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1984 NUM_BANKS(ADDR_SURF_8_BANK));
1985 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1986 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1987 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1988 NUM_BANKS(ADDR_SURF_8_BANK));
1989 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1990 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1991 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1992 NUM_BANKS(ADDR_SURF_8_BANK));
1993 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1994 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1995 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1996 NUM_BANKS(ADDR_SURF_8_BANK));
1997 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1998 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1999 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2000 NUM_BANKS(ADDR_SURF_8_BANK));
2001 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2002 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2003 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2004 NUM_BANKS(ADDR_SURF_8_BANK));
2005 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2006 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2007 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2008 NUM_BANKS(ADDR_SURF_8_BANK));
2009 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2010 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2011 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2012 NUM_BANKS(ADDR_SURF_8_BANK));
2013 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2014 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2015 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2016 NUM_BANKS(ADDR_SURF_8_BANK));
2017 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2018 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2019 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2020 NUM_BANKS(ADDR_SURF_8_BANK));
2021 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2022 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2023 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2024 NUM_BANKS(ADDR_SURF_4_BANK));
2025
2026 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2027 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2028
2029 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2030 if (reg_offset != 7)
2031 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2032
2033 break;
2034 case CHIP_TONGA:
2035 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2036 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2037 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2038 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2039 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2040 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2041 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2042 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2043 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2044 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2045 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2046 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2047 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2048 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2049 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2050 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2051 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2052 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2053 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2054 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2055 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2056 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2057 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2058 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2059 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2060 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2061 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2062 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2063 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2064 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2065 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2066 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2067 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2068 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2069 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2070 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2071 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2072 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2073 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2074 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2075 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2076 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2077 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2078 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2079 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2080 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2081 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2082 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2083 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2084 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2085 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2086 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2087 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2088 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2089 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2090 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2091 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2092 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2094 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2095 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2096 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2097 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2098 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2099 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2100 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2101 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2102 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2103 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2104 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2105 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2106 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2107 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2108 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2109 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2110 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2111 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2112 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2113 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2114 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2115 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2116 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2117 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2118 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2119 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2120 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2121 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2122 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2123 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2124 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2125 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2126 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2127 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2128 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2129 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2130 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2131 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2132 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2133 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2134 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2135 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2136 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2137 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2138 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2139 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2140 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2141 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2142 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2143 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2144 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2145 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2146 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2147 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2148 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2149 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2150 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2151 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2152 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2153 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2154 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2155 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2156 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2157
2158 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2159 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2160 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2161 NUM_BANKS(ADDR_SURF_16_BANK));
2162 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2163 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2164 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2165 NUM_BANKS(ADDR_SURF_16_BANK));
2166 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2167 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2168 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2169 NUM_BANKS(ADDR_SURF_16_BANK));
2170 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2171 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2172 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2173 NUM_BANKS(ADDR_SURF_16_BANK));
2174 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2175 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2176 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2177 NUM_BANKS(ADDR_SURF_16_BANK));
2178 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2179 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2180 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2181 NUM_BANKS(ADDR_SURF_16_BANK));
2182 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2183 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2184 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2185 NUM_BANKS(ADDR_SURF_16_BANK));
2186 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2187 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2188 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2189 NUM_BANKS(ADDR_SURF_16_BANK));
2190 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2191 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2192 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2193 NUM_BANKS(ADDR_SURF_16_BANK));
2194 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2195 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2196 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2197 NUM_BANKS(ADDR_SURF_16_BANK));
2198 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2199 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2200 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2201 NUM_BANKS(ADDR_SURF_16_BANK));
2202 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2203 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2204 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2205 NUM_BANKS(ADDR_SURF_8_BANK));
2206 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2207 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2208 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2209 NUM_BANKS(ADDR_SURF_4_BANK));
2210 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2211 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2212 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2213 NUM_BANKS(ADDR_SURF_4_BANK));
2214
2215 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2216 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2217
2218 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2219 if (reg_offset != 7)
2220 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2221
2222 break;
2223 case CHIP_STONEY:
2224 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225 PIPE_CONFIG(ADDR_SURF_P2) |
2226 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2227 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2228 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2229 PIPE_CONFIG(ADDR_SURF_P2) |
2230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2232 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2233 PIPE_CONFIG(ADDR_SURF_P2) |
2234 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2236 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 PIPE_CONFIG(ADDR_SURF_P2) |
2238 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2240 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2241 PIPE_CONFIG(ADDR_SURF_P2) |
2242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2244 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2245 PIPE_CONFIG(ADDR_SURF_P2) |
2246 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2248 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2249 PIPE_CONFIG(ADDR_SURF_P2) |
2250 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2251 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2252 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2253 PIPE_CONFIG(ADDR_SURF_P2));
2254 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2255 PIPE_CONFIG(ADDR_SURF_P2) |
2256 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2257 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2258 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 PIPE_CONFIG(ADDR_SURF_P2) |
2260 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2261 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2262 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2263 PIPE_CONFIG(ADDR_SURF_P2) |
2264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2266 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2267 PIPE_CONFIG(ADDR_SURF_P2) |
2268 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2269 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2270 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2271 PIPE_CONFIG(ADDR_SURF_P2) |
2272 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2273 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2274 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2275 PIPE_CONFIG(ADDR_SURF_P2) |
2276 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2278 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 PIPE_CONFIG(ADDR_SURF_P2) |
2280 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2281 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2282 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2283 PIPE_CONFIG(ADDR_SURF_P2) |
2284 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2285 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2286 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2287 PIPE_CONFIG(ADDR_SURF_P2) |
2288 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2290 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2291 PIPE_CONFIG(ADDR_SURF_P2) |
2292 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2293 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2294 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2295 PIPE_CONFIG(ADDR_SURF_P2) |
2296 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2297 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2298 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2299 PIPE_CONFIG(ADDR_SURF_P2) |
2300 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2302 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2303 PIPE_CONFIG(ADDR_SURF_P2) |
2304 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2305 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2306 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2307 PIPE_CONFIG(ADDR_SURF_P2) |
2308 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2310 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2311 PIPE_CONFIG(ADDR_SURF_P2) |
2312 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2314 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2315 PIPE_CONFIG(ADDR_SURF_P2) |
2316 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2318 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P2) |
2320 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2322 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P2) |
2324 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2326
2327 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2328 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2329 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2330 NUM_BANKS(ADDR_SURF_8_BANK));
2331 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2332 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2333 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2334 NUM_BANKS(ADDR_SURF_8_BANK));
2335 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2336 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2337 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2338 NUM_BANKS(ADDR_SURF_8_BANK));
2339 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2340 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2341 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2342 NUM_BANKS(ADDR_SURF_8_BANK));
2343 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2345 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2346 NUM_BANKS(ADDR_SURF_8_BANK));
2347 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2348 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2349 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2350 NUM_BANKS(ADDR_SURF_8_BANK));
2351 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2352 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2353 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2354 NUM_BANKS(ADDR_SURF_8_BANK));
2355 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2356 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2357 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2358 NUM_BANKS(ADDR_SURF_16_BANK));
2359 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2360 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2361 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2362 NUM_BANKS(ADDR_SURF_16_BANK));
2363 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2364 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2366 NUM_BANKS(ADDR_SURF_16_BANK));
2367 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2368 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2369 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2370 NUM_BANKS(ADDR_SURF_16_BANK));
2371 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2372 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2373 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2374 NUM_BANKS(ADDR_SURF_16_BANK));
2375 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2376 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2377 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2378 NUM_BANKS(ADDR_SURF_16_BANK));
2379 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2382 NUM_BANKS(ADDR_SURF_8_BANK));
2383
2384 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2385 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2386 reg_offset != 23)
2387 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2388
2389 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2390 if (reg_offset != 7)
2391 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2392
2393 break;
2394 default:
2395 dev_warn(adev->dev,
2396 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
2397 adev->asic_type);
2398
2399 case CHIP_CARRIZO:
2400 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2401 PIPE_CONFIG(ADDR_SURF_P2) |
2402 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2403 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2404 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2405 PIPE_CONFIG(ADDR_SURF_P2) |
2406 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2407 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2408 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 PIPE_CONFIG(ADDR_SURF_P2) |
2410 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2411 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2412 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2413 PIPE_CONFIG(ADDR_SURF_P2) |
2414 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2415 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2416 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2417 PIPE_CONFIG(ADDR_SURF_P2) |
2418 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2419 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2420 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2421 PIPE_CONFIG(ADDR_SURF_P2) |
2422 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2423 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2424 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P2) |
2426 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2427 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2428 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2429 PIPE_CONFIG(ADDR_SURF_P2));
2430 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2431 PIPE_CONFIG(ADDR_SURF_P2) |
2432 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2433 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2435 PIPE_CONFIG(ADDR_SURF_P2) |
2436 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2437 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2438 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2439 PIPE_CONFIG(ADDR_SURF_P2) |
2440 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2441 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2442 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2443 PIPE_CONFIG(ADDR_SURF_P2) |
2444 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2445 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2447 PIPE_CONFIG(ADDR_SURF_P2) |
2448 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2449 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2450 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2451 PIPE_CONFIG(ADDR_SURF_P2) |
2452 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2453 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2454 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2455 PIPE_CONFIG(ADDR_SURF_P2) |
2456 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2457 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2458 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2459 PIPE_CONFIG(ADDR_SURF_P2) |
2460 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2462 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2463 PIPE_CONFIG(ADDR_SURF_P2) |
2464 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2465 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2466 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2467 PIPE_CONFIG(ADDR_SURF_P2) |
2468 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2469 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2470 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2471 PIPE_CONFIG(ADDR_SURF_P2) |
2472 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2473 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2474 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2475 PIPE_CONFIG(ADDR_SURF_P2) |
2476 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2477 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2478 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2479 PIPE_CONFIG(ADDR_SURF_P2) |
2480 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2481 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2482 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2483 PIPE_CONFIG(ADDR_SURF_P2) |
2484 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2485 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2486 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2487 PIPE_CONFIG(ADDR_SURF_P2) |
2488 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2489 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2490 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2491 PIPE_CONFIG(ADDR_SURF_P2) |
2492 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2493 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2494 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2495 PIPE_CONFIG(ADDR_SURF_P2) |
2496 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2497 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2498 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499 PIPE_CONFIG(ADDR_SURF_P2) |
2500 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2501 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2502
2503 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2506 NUM_BANKS(ADDR_SURF_8_BANK));
2507 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2509 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2510 NUM_BANKS(ADDR_SURF_8_BANK));
2511 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2512 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2513 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2514 NUM_BANKS(ADDR_SURF_8_BANK));
2515 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2518 NUM_BANKS(ADDR_SURF_8_BANK));
2519 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2521 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2522 NUM_BANKS(ADDR_SURF_8_BANK));
2523 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2524 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2525 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2526 NUM_BANKS(ADDR_SURF_8_BANK));
2527 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2530 NUM_BANKS(ADDR_SURF_8_BANK));
2531 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2532 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2533 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2534 NUM_BANKS(ADDR_SURF_16_BANK));
2535 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2536 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2538 NUM_BANKS(ADDR_SURF_16_BANK));
2539 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2540 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2541 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2542 NUM_BANKS(ADDR_SURF_16_BANK));
2543 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2544 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2545 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2546 NUM_BANKS(ADDR_SURF_16_BANK));
2547 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2548 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2549 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2550 NUM_BANKS(ADDR_SURF_16_BANK));
2551 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2553 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2554 NUM_BANKS(ADDR_SURF_16_BANK));
2555 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2558 NUM_BANKS(ADDR_SURF_8_BANK));
2559
2560 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2561 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2562 reg_offset != 23)
2563 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2564
2565 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2566 if (reg_offset != 7)
2567 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2568
2569 break;
2570 }
2571 }
2572
2573 void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
2574 {
2575 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
2576
2577 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
2578 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2579 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2580 } else if (se_num == 0xffffffff) {
2581 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2582 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
2583 } else if (sh_num == 0xffffffff) {
2584 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
2585 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2586 } else {
2587 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
2588 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
2589 }
2590 WREG32(mmGRBM_GFX_INDEX, data);
2591 }
2592
2593 static u32 gfx_v8_0_create_bitmask(u32 bit_width)
2594 {
2595 return (u32)((1ULL << bit_width) - 1);
2596 }
2597
2598 static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
2599 {
2600 u32 data, mask;
2601
2602 data = RREG32(mmCC_RB_BACKEND_DISABLE);
2603 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
2604
2605 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
2606 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
2607
2608 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
2609 adev->gfx.config.max_sh_per_se);
2610
2611 return (~data) & mask;
2612 }
2613
2614 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
2615 {
2616 int i, j;
2617 u32 data;
2618 u32 active_rbs = 0;
2619 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
2620 adev->gfx.config.max_sh_per_se;
2621
2622 mutex_lock(&adev->grbm_idx_mutex);
2623 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2624 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2625 gfx_v8_0_select_se_sh(adev, i, j);
2626 data = gfx_v8_0_get_rb_active_bitmap(adev);
2627 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
2628 rb_bitmap_width_per_sh);
2629 }
2630 }
2631 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2632 mutex_unlock(&adev->grbm_idx_mutex);
2633
2634 adev->gfx.config.backend_enable_mask = active_rbs;
2635 adev->gfx.config.num_rbs = hweight32(active_rbs);
2636 }
2637
2638 /**
2639 * gfx_v8_0_init_compute_vmid - gart enable
2640 *
2641 * @rdev: amdgpu_device pointer
2642 *
2643 * Initialize compute vmid sh_mem registers
2644 *
2645 */
2646 #define DEFAULT_SH_MEM_BASES (0x6000)
2647 #define FIRST_COMPUTE_VMID (8)
2648 #define LAST_COMPUTE_VMID (16)
2649 static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
2650 {
2651 int i;
2652 uint32_t sh_mem_config;
2653 uint32_t sh_mem_bases;
2654
2655 /*
2656 * Configure apertures:
2657 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
2658 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
2659 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
2660 */
2661 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
2662
2663 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
2664 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
2665 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
2666 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
2667 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
2668 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
2669
2670 mutex_lock(&adev->srbm_mutex);
2671 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
2672 vi_srbm_select(adev, 0, 0, 0, i);
2673 /* CP and shaders */
2674 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
2675 WREG32(mmSH_MEM_APE1_BASE, 1);
2676 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2677 WREG32(mmSH_MEM_BASES, sh_mem_bases);
2678 }
2679 vi_srbm_select(adev, 0, 0, 0, 0);
2680 mutex_unlock(&adev->srbm_mutex);
2681 }
2682
2683 static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
2684 {
2685 u32 tmp;
2686 int i;
2687
2688 tmp = RREG32(mmGRBM_CNTL);
2689 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
2690 WREG32(mmGRBM_CNTL, tmp);
2691
2692 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2693 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
2694 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
2695
2696 gfx_v8_0_tiling_mode_table_init(adev);
2697
2698 gfx_v8_0_setup_rb(adev);
2699
2700 /* XXX SH_MEM regs */
2701 /* where to put LDS, scratch, GPUVM in FSA64 space */
2702 mutex_lock(&adev->srbm_mutex);
2703 for (i = 0; i < 16; i++) {
2704 vi_srbm_select(adev, 0, 0, 0, i);
2705 /* CP and shaders */
2706 if (i == 0) {
2707 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2708 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2709 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2710 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2711 WREG32(mmSH_MEM_CONFIG, tmp);
2712 } else {
2713 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2714 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2715 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
2716 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
2717 WREG32(mmSH_MEM_CONFIG, tmp);
2718 }
2719
2720 WREG32(mmSH_MEM_APE1_BASE, 1);
2721 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2722 WREG32(mmSH_MEM_BASES, 0);
2723 }
2724 vi_srbm_select(adev, 0, 0, 0, 0);
2725 mutex_unlock(&adev->srbm_mutex);
2726
2727 gfx_v8_0_init_compute_vmid(adev);
2728
2729 mutex_lock(&adev->grbm_idx_mutex);
2730 /*
2731 * making sure that the following register writes will be broadcasted
2732 * to all the shaders
2733 */
2734 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2735
2736 WREG32(mmPA_SC_FIFO_SIZE,
2737 (adev->gfx.config.sc_prim_fifo_size_frontend <<
2738 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2739 (adev->gfx.config.sc_prim_fifo_size_backend <<
2740 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2741 (adev->gfx.config.sc_hiz_tile_fifo_size <<
2742 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2743 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2744 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2745 mutex_unlock(&adev->grbm_idx_mutex);
2746
2747 }
2748
2749 static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2750 {
2751 u32 i, j, k;
2752 u32 mask;
2753
2754 mutex_lock(&adev->grbm_idx_mutex);
2755 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2756 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2757 gfx_v8_0_select_se_sh(adev, i, j);
2758 for (k = 0; k < adev->usec_timeout; k++) {
2759 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2760 break;
2761 udelay(1);
2762 }
2763 }
2764 }
2765 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2766 mutex_unlock(&adev->grbm_idx_mutex);
2767
2768 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2769 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2770 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2771 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2772 for (k = 0; k < adev->usec_timeout; k++) {
2773 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2774 break;
2775 udelay(1);
2776 }
2777 }
2778
2779 static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2780 bool enable)
2781 {
2782 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2783
2784 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
2785 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
2786 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
2787 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
2788
2789 WREG32(mmCP_INT_CNTL_RING0, tmp);
2790 }
2791
2792 void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2793 {
2794 u32 tmp = RREG32(mmRLC_CNTL);
2795
2796 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2797 WREG32(mmRLC_CNTL, tmp);
2798
2799 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2800
2801 gfx_v8_0_wait_for_rlc_serdes(adev);
2802 }
2803
2804 static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2805 {
2806 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2807
2808 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2809 WREG32(mmGRBM_SOFT_RESET, tmp);
2810 udelay(50);
2811 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2812 WREG32(mmGRBM_SOFT_RESET, tmp);
2813 udelay(50);
2814 }
2815
2816 static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2817 {
2818 u32 tmp = RREG32(mmRLC_CNTL);
2819
2820 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2821 WREG32(mmRLC_CNTL, tmp);
2822
2823 /* carrizo do enable cp interrupt after cp inited */
2824 if (!(adev->flags & AMD_IS_APU))
2825 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2826
2827 udelay(50);
2828 }
2829
2830 static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2831 {
2832 const struct rlc_firmware_header_v2_0 *hdr;
2833 const __le32 *fw_data;
2834 unsigned i, fw_size;
2835
2836 if (!adev->gfx.rlc_fw)
2837 return -EINVAL;
2838
2839 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2840 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2841
2842 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2843 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2844 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2845
2846 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2847 for (i = 0; i < fw_size; i++)
2848 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2849 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2850
2851 return 0;
2852 }
2853
2854 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2855 {
2856 int r;
2857
2858 gfx_v8_0_rlc_stop(adev);
2859
2860 /* disable CG */
2861 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2862
2863 /* disable PG */
2864 WREG32(mmRLC_PG_CNTL, 0);
2865
2866 gfx_v8_0_rlc_reset(adev);
2867
2868 if (!adev->pp_enabled) {
2869 if (!adev->firmware.smu_load) {
2870 /* legacy rlc firmware loading */
2871 r = gfx_v8_0_rlc_load_microcode(adev);
2872 if (r)
2873 return r;
2874 } else {
2875 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2876 AMDGPU_UCODE_ID_RLC_G);
2877 if (r)
2878 return -EINVAL;
2879 }
2880 }
2881
2882 gfx_v8_0_rlc_start(adev);
2883
2884 return 0;
2885 }
2886
2887 static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2888 {
2889 int i;
2890 u32 tmp = RREG32(mmCP_ME_CNTL);
2891
2892 if (enable) {
2893 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2894 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2895 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2896 } else {
2897 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2898 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2899 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2900 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2901 adev->gfx.gfx_ring[i].ready = false;
2902 }
2903 WREG32(mmCP_ME_CNTL, tmp);
2904 udelay(50);
2905 }
2906
2907 static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2908 {
2909 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2910 const struct gfx_firmware_header_v1_0 *ce_hdr;
2911 const struct gfx_firmware_header_v1_0 *me_hdr;
2912 const __le32 *fw_data;
2913 unsigned i, fw_size;
2914
2915 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2916 return -EINVAL;
2917
2918 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2919 adev->gfx.pfp_fw->data;
2920 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2921 adev->gfx.ce_fw->data;
2922 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2923 adev->gfx.me_fw->data;
2924
2925 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2926 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2927 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2928
2929 gfx_v8_0_cp_gfx_enable(adev, false);
2930
2931 /* PFP */
2932 fw_data = (const __le32 *)
2933 (adev->gfx.pfp_fw->data +
2934 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2935 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2936 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2937 for (i = 0; i < fw_size; i++)
2938 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2939 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2940
2941 /* CE */
2942 fw_data = (const __le32 *)
2943 (adev->gfx.ce_fw->data +
2944 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2945 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2946 WREG32(mmCP_CE_UCODE_ADDR, 0);
2947 for (i = 0; i < fw_size; i++)
2948 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2949 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2950
2951 /* ME */
2952 fw_data = (const __le32 *)
2953 (adev->gfx.me_fw->data +
2954 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2955 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2956 WREG32(mmCP_ME_RAM_WADDR, 0);
2957 for (i = 0; i < fw_size; i++)
2958 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2959 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2960
2961 return 0;
2962 }
2963
2964 static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2965 {
2966 u32 count = 0;
2967 const struct cs_section_def *sect = NULL;
2968 const struct cs_extent_def *ext = NULL;
2969
2970 /* begin clear state */
2971 count += 2;
2972 /* context control state */
2973 count += 3;
2974
2975 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2976 for (ext = sect->section; ext->extent != NULL; ++ext) {
2977 if (sect->id == SECT_CONTEXT)
2978 count += 2 + ext->reg_count;
2979 else
2980 return 0;
2981 }
2982 }
2983 /* pa_sc_raster_config/pa_sc_raster_config1 */
2984 count += 4;
2985 /* end clear state */
2986 count += 2;
2987 /* clear state */
2988 count += 2;
2989
2990 return count;
2991 }
2992
2993 static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2994 {
2995 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2996 const struct cs_section_def *sect = NULL;
2997 const struct cs_extent_def *ext = NULL;
2998 int r, i;
2999
3000 /* init the CP */
3001 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
3002 WREG32(mmCP_ENDIAN_SWAP, 0);
3003 WREG32(mmCP_DEVICE_ID, 1);
3004
3005 gfx_v8_0_cp_gfx_enable(adev, true);
3006
3007 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
3008 if (r) {
3009 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
3010 return r;
3011 }
3012
3013 /* clear state buffer */
3014 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3015 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3016
3017 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3018 amdgpu_ring_write(ring, 0x80000000);
3019 amdgpu_ring_write(ring, 0x80000000);
3020
3021 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
3022 for (ext = sect->section; ext->extent != NULL; ++ext) {
3023 if (sect->id == SECT_CONTEXT) {
3024 amdgpu_ring_write(ring,
3025 PACKET3(PACKET3_SET_CONTEXT_REG,
3026 ext->reg_count));
3027 amdgpu_ring_write(ring,
3028 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
3029 for (i = 0; i < ext->reg_count; i++)
3030 amdgpu_ring_write(ring, ext->extent[i]);
3031 }
3032 }
3033 }
3034
3035 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3036 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
3037 switch (adev->asic_type) {
3038 case CHIP_TONGA:
3039 amdgpu_ring_write(ring, 0x16000012);
3040 amdgpu_ring_write(ring, 0x0000002A);
3041 break;
3042 case CHIP_FIJI:
3043 amdgpu_ring_write(ring, 0x3a00161a);
3044 amdgpu_ring_write(ring, 0x0000002e);
3045 break;
3046 case CHIP_TOPAZ:
3047 case CHIP_CARRIZO:
3048 amdgpu_ring_write(ring, 0x00000002);
3049 amdgpu_ring_write(ring, 0x00000000);
3050 break;
3051 case CHIP_STONEY:
3052 amdgpu_ring_write(ring, 0x00000000);
3053 amdgpu_ring_write(ring, 0x00000000);
3054 break;
3055 default:
3056 BUG();
3057 }
3058
3059 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3060 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3061
3062 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3063 amdgpu_ring_write(ring, 0);
3064
3065 /* init the CE partitions */
3066 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3067 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3068 amdgpu_ring_write(ring, 0x8000);
3069 amdgpu_ring_write(ring, 0x8000);
3070
3071 amdgpu_ring_commit(ring);
3072
3073 return 0;
3074 }
3075
3076 static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
3077 {
3078 struct amdgpu_ring *ring;
3079 u32 tmp;
3080 u32 rb_bufsz;
3081 u64 rb_addr, rptr_addr;
3082 int r;
3083
3084 /* Set the write pointer delay */
3085 WREG32(mmCP_RB_WPTR_DELAY, 0);
3086
3087 /* set the RB to use vmid 0 */
3088 WREG32(mmCP_RB_VMID, 0);
3089
3090 /* Set ring buffer size */
3091 ring = &adev->gfx.gfx_ring[0];
3092 rb_bufsz = order_base_2(ring->ring_size / 8);
3093 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
3094 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
3095 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
3096 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
3097 #ifdef __BIG_ENDIAN
3098 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
3099 #endif
3100 WREG32(mmCP_RB0_CNTL, tmp);
3101
3102 /* Initialize the ring buffer's read and write pointers */
3103 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
3104 ring->wptr = 0;
3105 WREG32(mmCP_RB0_WPTR, ring->wptr);
3106
3107 /* set the wb address wether it's enabled or not */
3108 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3109 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
3110 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
3111
3112 mdelay(1);
3113 WREG32(mmCP_RB0_CNTL, tmp);
3114
3115 rb_addr = ring->gpu_addr >> 8;
3116 WREG32(mmCP_RB0_BASE, rb_addr);
3117 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
3118
3119 /* no gfx doorbells on iceland */
3120 if (adev->asic_type != CHIP_TOPAZ) {
3121 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
3122 if (ring->use_doorbell) {
3123 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3124 DOORBELL_OFFSET, ring->doorbell_index);
3125 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3126 DOORBELL_EN, 1);
3127 } else {
3128 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
3129 DOORBELL_EN, 0);
3130 }
3131 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
3132
3133 if (adev->asic_type == CHIP_TONGA) {
3134 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
3135 DOORBELL_RANGE_LOWER,
3136 AMDGPU_DOORBELL_GFX_RING0);
3137 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
3138
3139 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
3140 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
3141 }
3142
3143 }
3144
3145 /* start the ring */
3146 gfx_v8_0_cp_gfx_start(adev);
3147 ring->ready = true;
3148 r = amdgpu_ring_test_ring(ring);
3149 if (r) {
3150 ring->ready = false;
3151 return r;
3152 }
3153
3154 return 0;
3155 }
3156
3157 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
3158 {
3159 int i;
3160
3161 if (enable) {
3162 WREG32(mmCP_MEC_CNTL, 0);
3163 } else {
3164 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
3165 for (i = 0; i < adev->gfx.num_compute_rings; i++)
3166 adev->gfx.compute_ring[i].ready = false;
3167 }
3168 udelay(50);
3169 }
3170
3171 static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
3172 {
3173 const struct gfx_firmware_header_v1_0 *mec_hdr;
3174 const __le32 *fw_data;
3175 unsigned i, fw_size;
3176
3177 if (!adev->gfx.mec_fw)
3178 return -EINVAL;
3179
3180 gfx_v8_0_cp_compute_enable(adev, false);
3181
3182 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
3183 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
3184
3185 fw_data = (const __le32 *)
3186 (adev->gfx.mec_fw->data +
3187 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
3188 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
3189
3190 /* MEC1 */
3191 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
3192 for (i = 0; i < fw_size; i++)
3193 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
3194 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
3195
3196 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
3197 if (adev->gfx.mec2_fw) {
3198 const struct gfx_firmware_header_v1_0 *mec2_hdr;
3199
3200 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
3201 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
3202
3203 fw_data = (const __le32 *)
3204 (adev->gfx.mec2_fw->data +
3205 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
3206 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
3207
3208 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
3209 for (i = 0; i < fw_size; i++)
3210 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
3211 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
3212 }
3213
3214 return 0;
3215 }
3216
3217 struct vi_mqd {
3218 uint32_t header; /* ordinal0 */
3219 uint32_t compute_dispatch_initiator; /* ordinal1 */
3220 uint32_t compute_dim_x; /* ordinal2 */
3221 uint32_t compute_dim_y; /* ordinal3 */
3222 uint32_t compute_dim_z; /* ordinal4 */
3223 uint32_t compute_start_x; /* ordinal5 */
3224 uint32_t compute_start_y; /* ordinal6 */
3225 uint32_t compute_start_z; /* ordinal7 */
3226 uint32_t compute_num_thread_x; /* ordinal8 */
3227 uint32_t compute_num_thread_y; /* ordinal9 */
3228 uint32_t compute_num_thread_z; /* ordinal10 */
3229 uint32_t compute_pipelinestat_enable; /* ordinal11 */
3230 uint32_t compute_perfcount_enable; /* ordinal12 */
3231 uint32_t compute_pgm_lo; /* ordinal13 */
3232 uint32_t compute_pgm_hi; /* ordinal14 */
3233 uint32_t compute_tba_lo; /* ordinal15 */
3234 uint32_t compute_tba_hi; /* ordinal16 */
3235 uint32_t compute_tma_lo; /* ordinal17 */
3236 uint32_t compute_tma_hi; /* ordinal18 */
3237 uint32_t compute_pgm_rsrc1; /* ordinal19 */
3238 uint32_t compute_pgm_rsrc2; /* ordinal20 */
3239 uint32_t compute_vmid; /* ordinal21 */
3240 uint32_t compute_resource_limits; /* ordinal22 */
3241 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
3242 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
3243 uint32_t compute_tmpring_size; /* ordinal25 */
3244 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
3245 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
3246 uint32_t compute_restart_x; /* ordinal28 */
3247 uint32_t compute_restart_y; /* ordinal29 */
3248 uint32_t compute_restart_z; /* ordinal30 */
3249 uint32_t compute_thread_trace_enable; /* ordinal31 */
3250 uint32_t compute_misc_reserved; /* ordinal32 */
3251 uint32_t compute_dispatch_id; /* ordinal33 */
3252 uint32_t compute_threadgroup_id; /* ordinal34 */
3253 uint32_t compute_relaunch; /* ordinal35 */
3254 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
3255 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
3256 uint32_t compute_wave_restore_control; /* ordinal38 */
3257 uint32_t reserved9; /* ordinal39 */
3258 uint32_t reserved10; /* ordinal40 */
3259 uint32_t reserved11; /* ordinal41 */
3260 uint32_t reserved12; /* ordinal42 */
3261 uint32_t reserved13; /* ordinal43 */
3262 uint32_t reserved14; /* ordinal44 */
3263 uint32_t reserved15; /* ordinal45 */
3264 uint32_t reserved16; /* ordinal46 */
3265 uint32_t reserved17; /* ordinal47 */
3266 uint32_t reserved18; /* ordinal48 */
3267 uint32_t reserved19; /* ordinal49 */
3268 uint32_t reserved20; /* ordinal50 */
3269 uint32_t reserved21; /* ordinal51 */
3270 uint32_t reserved22; /* ordinal52 */
3271 uint32_t reserved23; /* ordinal53 */
3272 uint32_t reserved24; /* ordinal54 */
3273 uint32_t reserved25; /* ordinal55 */
3274 uint32_t reserved26; /* ordinal56 */
3275 uint32_t reserved27; /* ordinal57 */
3276 uint32_t reserved28; /* ordinal58 */
3277 uint32_t reserved29; /* ordinal59 */
3278 uint32_t reserved30; /* ordinal60 */
3279 uint32_t reserved31; /* ordinal61 */
3280 uint32_t reserved32; /* ordinal62 */
3281 uint32_t reserved33; /* ordinal63 */
3282 uint32_t reserved34; /* ordinal64 */
3283 uint32_t compute_user_data_0; /* ordinal65 */
3284 uint32_t compute_user_data_1; /* ordinal66 */
3285 uint32_t compute_user_data_2; /* ordinal67 */
3286 uint32_t compute_user_data_3; /* ordinal68 */
3287 uint32_t compute_user_data_4; /* ordinal69 */
3288 uint32_t compute_user_data_5; /* ordinal70 */
3289 uint32_t compute_user_data_6; /* ordinal71 */
3290 uint32_t compute_user_data_7; /* ordinal72 */
3291 uint32_t compute_user_data_8; /* ordinal73 */
3292 uint32_t compute_user_data_9; /* ordinal74 */
3293 uint32_t compute_user_data_10; /* ordinal75 */
3294 uint32_t compute_user_data_11; /* ordinal76 */
3295 uint32_t compute_user_data_12; /* ordinal77 */
3296 uint32_t compute_user_data_13; /* ordinal78 */
3297 uint32_t compute_user_data_14; /* ordinal79 */
3298 uint32_t compute_user_data_15; /* ordinal80 */
3299 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
3300 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
3301 uint32_t reserved35; /* ordinal83 */
3302 uint32_t reserved36; /* ordinal84 */
3303 uint32_t reserved37; /* ordinal85 */
3304 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
3305 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
3306 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
3307 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
3308 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
3309 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
3310 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
3311 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
3312 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
3313 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
3314 uint32_t reserved38; /* ordinal96 */
3315 uint32_t reserved39; /* ordinal97 */
3316 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
3317 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
3318 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
3319 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
3320 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
3321 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
3322 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
3323 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
3324 uint32_t reserved40; /* ordinal106 */
3325 uint32_t reserved41; /* ordinal107 */
3326 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
3327 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
3328 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
3329 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
3330 uint32_t reserved42; /* ordinal112 */
3331 uint32_t reserved43; /* ordinal113 */
3332 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
3333 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
3334 uint32_t cp_packet_id_lo; /* ordinal116 */
3335 uint32_t cp_packet_id_hi; /* ordinal117 */
3336 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
3337 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
3338 uint32_t gds_save_base_addr_lo; /* ordinal120 */
3339 uint32_t gds_save_base_addr_hi; /* ordinal121 */
3340 uint32_t gds_save_mask_lo; /* ordinal122 */
3341 uint32_t gds_save_mask_hi; /* ordinal123 */
3342 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
3343 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
3344 uint32_t reserved44; /* ordinal126 */
3345 uint32_t reserved45; /* ordinal127 */
3346 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
3347 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
3348 uint32_t cp_hqd_active; /* ordinal130 */
3349 uint32_t cp_hqd_vmid; /* ordinal131 */
3350 uint32_t cp_hqd_persistent_state; /* ordinal132 */
3351 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
3352 uint32_t cp_hqd_queue_priority; /* ordinal134 */
3353 uint32_t cp_hqd_quantum; /* ordinal135 */
3354 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
3355 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
3356 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
3357 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
3358 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
3359 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
3360 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
3361 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
3362 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
3363 uint32_t cp_hqd_pq_control; /* ordinal145 */
3364 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
3365 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
3366 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
3367 uint32_t cp_hqd_ib_control; /* ordinal149 */
3368 uint32_t cp_hqd_iq_timer; /* ordinal150 */
3369 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
3370 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
3371 uint32_t cp_hqd_dma_offload; /* ordinal153 */
3372 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
3373 uint32_t cp_hqd_msg_type; /* ordinal155 */
3374 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
3375 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
3376 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
3377 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
3378 uint32_t cp_hqd_hq_status0; /* ordinal160 */
3379 uint32_t cp_hqd_hq_control0; /* ordinal161 */
3380 uint32_t cp_mqd_control; /* ordinal162 */
3381 uint32_t cp_hqd_hq_status1; /* ordinal163 */
3382 uint32_t cp_hqd_hq_control1; /* ordinal164 */
3383 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
3384 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
3385 uint32_t cp_hqd_eop_control; /* ordinal167 */
3386 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
3387 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
3388 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
3389 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
3390 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
3391 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
3392 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
3393 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
3394 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
3395 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
3396 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
3397 uint32_t cp_hqd_error; /* ordinal179 */
3398 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
3399 uint32_t cp_hqd_eop_dones; /* ordinal181 */
3400 uint32_t reserved46; /* ordinal182 */
3401 uint32_t reserved47; /* ordinal183 */
3402 uint32_t reserved48; /* ordinal184 */
3403 uint32_t reserved49; /* ordinal185 */
3404 uint32_t reserved50; /* ordinal186 */
3405 uint32_t reserved51; /* ordinal187 */
3406 uint32_t reserved52; /* ordinal188 */
3407 uint32_t reserved53; /* ordinal189 */
3408 uint32_t reserved54; /* ordinal190 */
3409 uint32_t reserved55; /* ordinal191 */
3410 uint32_t iqtimer_pkt_header; /* ordinal192 */
3411 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
3412 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
3413 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
3414 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
3415 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
3416 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
3417 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
3418 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
3419 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
3420 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
3421 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
3422 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
3423 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
3424 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
3425 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
3426 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
3427 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
3428 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
3429 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
3430 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
3431 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
3432 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
3433 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
3434 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
3435 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
3436 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
3437 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
3438 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
3439 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
3440 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
3441 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
3442 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
3443 uint32_t reserved56; /* ordinal225 */
3444 uint32_t reserved57; /* ordinal226 */
3445 uint32_t reserved58; /* ordinal227 */
3446 uint32_t set_resources_header; /* ordinal228 */
3447 uint32_t set_resources_dw1; /* ordinal229 */
3448 uint32_t set_resources_dw2; /* ordinal230 */
3449 uint32_t set_resources_dw3; /* ordinal231 */
3450 uint32_t set_resources_dw4; /* ordinal232 */
3451 uint32_t set_resources_dw5; /* ordinal233 */
3452 uint32_t set_resources_dw6; /* ordinal234 */
3453 uint32_t set_resources_dw7; /* ordinal235 */
3454 uint32_t reserved59; /* ordinal236 */
3455 uint32_t reserved60; /* ordinal237 */
3456 uint32_t reserved61; /* ordinal238 */
3457 uint32_t reserved62; /* ordinal239 */
3458 uint32_t reserved63; /* ordinal240 */
3459 uint32_t reserved64; /* ordinal241 */
3460 uint32_t reserved65; /* ordinal242 */
3461 uint32_t reserved66; /* ordinal243 */
3462 uint32_t reserved67; /* ordinal244 */
3463 uint32_t reserved68; /* ordinal245 */
3464 uint32_t reserved69; /* ordinal246 */
3465 uint32_t reserved70; /* ordinal247 */
3466 uint32_t reserved71; /* ordinal248 */
3467 uint32_t reserved72; /* ordinal249 */
3468 uint32_t reserved73; /* ordinal250 */
3469 uint32_t reserved74; /* ordinal251 */
3470 uint32_t reserved75; /* ordinal252 */
3471 uint32_t reserved76; /* ordinal253 */
3472 uint32_t reserved77; /* ordinal254 */
3473 uint32_t reserved78; /* ordinal255 */
3474
3475 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
3476 };
3477
3478 static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
3479 {
3480 int i, r;
3481
3482 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3483 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3484
3485 if (ring->mqd_obj) {
3486 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3487 if (unlikely(r != 0))
3488 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
3489
3490 amdgpu_bo_unpin(ring->mqd_obj);
3491 amdgpu_bo_unreserve(ring->mqd_obj);
3492
3493 amdgpu_bo_unref(&ring->mqd_obj);
3494 ring->mqd_obj = NULL;
3495 }
3496 }
3497 }
3498
3499 static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
3500 {
3501 int r, i, j;
3502 u32 tmp;
3503 bool use_doorbell = true;
3504 u64 hqd_gpu_addr;
3505 u64 mqd_gpu_addr;
3506 u64 eop_gpu_addr;
3507 u64 wb_gpu_addr;
3508 u32 *buf;
3509 struct vi_mqd *mqd;
3510
3511 /* init the pipes */
3512 mutex_lock(&adev->srbm_mutex);
3513 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
3514 int me = (i < 4) ? 1 : 2;
3515 int pipe = (i < 4) ? i : (i - 4);
3516
3517 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
3518 eop_gpu_addr >>= 8;
3519
3520 vi_srbm_select(adev, me, pipe, 0, 0);
3521
3522 /* write the EOP addr */
3523 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
3524 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
3525
3526 /* set the VMID assigned */
3527 WREG32(mmCP_HQD_VMID, 0);
3528
3529 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3530 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
3531 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
3532 (order_base_2(MEC_HPD_SIZE / 4) - 1));
3533 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
3534 }
3535 vi_srbm_select(adev, 0, 0, 0, 0);
3536 mutex_unlock(&adev->srbm_mutex);
3537
3538 /* init the queues. Just two for now. */
3539 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3540 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3541
3542 if (ring->mqd_obj == NULL) {
3543 r = amdgpu_bo_create(adev,
3544 sizeof(struct vi_mqd),
3545 PAGE_SIZE, true,
3546 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
3547 NULL, &ring->mqd_obj);
3548 if (r) {
3549 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
3550 return r;
3551 }
3552 }
3553
3554 r = amdgpu_bo_reserve(ring->mqd_obj, false);
3555 if (unlikely(r != 0)) {
3556 gfx_v8_0_cp_compute_fini(adev);
3557 return r;
3558 }
3559 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
3560 &mqd_gpu_addr);
3561 if (r) {
3562 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
3563 gfx_v8_0_cp_compute_fini(adev);
3564 return r;
3565 }
3566 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
3567 if (r) {
3568 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
3569 gfx_v8_0_cp_compute_fini(adev);
3570 return r;
3571 }
3572
3573 /* init the mqd struct */
3574 memset(buf, 0, sizeof(struct vi_mqd));
3575
3576 mqd = (struct vi_mqd *)buf;
3577 mqd->header = 0xC0310800;
3578 mqd->compute_pipelinestat_enable = 0x00000001;
3579 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
3580 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
3581 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
3582 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
3583 mqd->compute_misc_reserved = 0x00000003;
3584
3585 mutex_lock(&adev->srbm_mutex);
3586 vi_srbm_select(adev, ring->me,
3587 ring->pipe,
3588 ring->queue, 0);
3589
3590 /* disable wptr polling */
3591 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
3592 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
3593 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
3594
3595 mqd->cp_hqd_eop_base_addr_lo =
3596 RREG32(mmCP_HQD_EOP_BASE_ADDR);
3597 mqd->cp_hqd_eop_base_addr_hi =
3598 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
3599
3600 /* enable doorbell? */
3601 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3602 if (use_doorbell) {
3603 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3604 } else {
3605 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
3606 }
3607 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
3608 mqd->cp_hqd_pq_doorbell_control = tmp;
3609
3610 /* disable the queue if it's active */
3611 mqd->cp_hqd_dequeue_request = 0;
3612 mqd->cp_hqd_pq_rptr = 0;
3613 mqd->cp_hqd_pq_wptr= 0;
3614 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
3615 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
3616 for (j = 0; j < adev->usec_timeout; j++) {
3617 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
3618 break;
3619 udelay(1);
3620 }
3621 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
3622 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
3623 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3624 }
3625
3626 /* set the pointer to the MQD */
3627 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
3628 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3629 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
3630 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
3631
3632 /* set MQD vmid to 0 */
3633 tmp = RREG32(mmCP_MQD_CONTROL);
3634 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
3635 WREG32(mmCP_MQD_CONTROL, tmp);
3636 mqd->cp_mqd_control = tmp;
3637
3638 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3639 hqd_gpu_addr = ring->gpu_addr >> 8;
3640 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
3641 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3642 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
3643 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
3644
3645 /* set up the HQD, this is similar to CP_RB0_CNTL */
3646 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
3647 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
3648 (order_base_2(ring->ring_size / 4) - 1));
3649 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
3650 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
3651 #ifdef __BIG_ENDIAN
3652 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3653 #endif
3654 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3655 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3656 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3657 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3658 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3659 mqd->cp_hqd_pq_control = tmp;
3660
3661 /* set the wb address wether it's enabled or not */
3662 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3663 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3664 mqd->cp_hqd_pq_rptr_report_addr_hi =
3665 upper_32_bits(wb_gpu_addr) & 0xffff;
3666 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3667 mqd->cp_hqd_pq_rptr_report_addr_lo);
3668 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3669 mqd->cp_hqd_pq_rptr_report_addr_hi);
3670
3671 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3672 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3673 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3674 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3675 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3676 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3677 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3678
3679 /* enable the doorbell if requested */
3680 if (use_doorbell) {
3681 if ((adev->asic_type == CHIP_CARRIZO) ||
3682 (adev->asic_type == CHIP_FIJI) ||
3683 (adev->asic_type == CHIP_STONEY)) {
3684 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3685 AMDGPU_DOORBELL_KIQ << 2);
3686 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3687 AMDGPU_DOORBELL_MEC_RING7 << 2);
3688 }
3689 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3690 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3691 DOORBELL_OFFSET, ring->doorbell_index);
3692 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3693 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3694 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3695 mqd->cp_hqd_pq_doorbell_control = tmp;
3696
3697 } else {
3698 mqd->cp_hqd_pq_doorbell_control = 0;
3699 }
3700 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3701 mqd->cp_hqd_pq_doorbell_control);
3702
3703 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3704 ring->wptr = 0;
3705 mqd->cp_hqd_pq_wptr = ring->wptr;
3706 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
3707 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
3708
3709 /* set the vmid for the queue */
3710 mqd->cp_hqd_vmid = 0;
3711 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3712
3713 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3714 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3715 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3716 mqd->cp_hqd_persistent_state = tmp;
3717 if (adev->asic_type == CHIP_STONEY) {
3718 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
3719 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
3720 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
3721 }
3722
3723 /* activate the queue */
3724 mqd->cp_hqd_active = 1;
3725 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3726
3727 vi_srbm_select(adev, 0, 0, 0, 0);
3728 mutex_unlock(&adev->srbm_mutex);
3729
3730 amdgpu_bo_kunmap(ring->mqd_obj);
3731 amdgpu_bo_unreserve(ring->mqd_obj);
3732 }
3733
3734 if (use_doorbell) {
3735 tmp = RREG32(mmCP_PQ_STATUS);
3736 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3737 WREG32(mmCP_PQ_STATUS, tmp);
3738 }
3739
3740 gfx_v8_0_cp_compute_enable(adev, true);
3741
3742 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3743 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3744
3745 ring->ready = true;
3746 r = amdgpu_ring_test_ring(ring);
3747 if (r)
3748 ring->ready = false;
3749 }
3750
3751 return 0;
3752 }
3753
3754 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3755 {
3756 int r;
3757
3758 if (!(adev->flags & AMD_IS_APU))
3759 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3760
3761 if (!adev->pp_enabled) {
3762 if (!adev->firmware.smu_load) {
3763 /* legacy firmware loading */
3764 r = gfx_v8_0_cp_gfx_load_microcode(adev);
3765 if (r)
3766 return r;
3767
3768 r = gfx_v8_0_cp_compute_load_microcode(adev);
3769 if (r)
3770 return r;
3771 } else {
3772 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3773 AMDGPU_UCODE_ID_CP_CE);
3774 if (r)
3775 return -EINVAL;
3776
3777 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3778 AMDGPU_UCODE_ID_CP_PFP);
3779 if (r)
3780 return -EINVAL;
3781
3782 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3783 AMDGPU_UCODE_ID_CP_ME);
3784 if (r)
3785 return -EINVAL;
3786
3787 if (adev->asic_type == CHIP_TOPAZ) {
3788 r = gfx_v8_0_cp_compute_load_microcode(adev);
3789 if (r)
3790 return r;
3791 } else {
3792 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3793 AMDGPU_UCODE_ID_CP_MEC1);
3794 if (r)
3795 return -EINVAL;
3796 }
3797 }
3798 }
3799
3800 r = gfx_v8_0_cp_gfx_resume(adev);
3801 if (r)
3802 return r;
3803
3804 r = gfx_v8_0_cp_compute_resume(adev);
3805 if (r)
3806 return r;
3807
3808 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3809
3810 return 0;
3811 }
3812
3813 static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3814 {
3815 gfx_v8_0_cp_gfx_enable(adev, enable);
3816 gfx_v8_0_cp_compute_enable(adev, enable);
3817 }
3818
3819 static int gfx_v8_0_hw_init(void *handle)
3820 {
3821 int r;
3822 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3823
3824 gfx_v8_0_init_golden_registers(adev);
3825
3826 gfx_v8_0_gpu_init(adev);
3827
3828 r = gfx_v8_0_rlc_resume(adev);
3829 if (r)
3830 return r;
3831
3832 r = gfx_v8_0_cp_resume(adev);
3833 if (r)
3834 return r;
3835
3836 return r;
3837 }
3838
3839 static int gfx_v8_0_hw_fini(void *handle)
3840 {
3841 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3842
3843 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
3844 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
3845 gfx_v8_0_cp_enable(adev, false);
3846 gfx_v8_0_rlc_stop(adev);
3847 gfx_v8_0_cp_compute_fini(adev);
3848
3849 return 0;
3850 }
3851
3852 static int gfx_v8_0_suspend(void *handle)
3853 {
3854 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3855
3856 return gfx_v8_0_hw_fini(adev);
3857 }
3858
3859 static int gfx_v8_0_resume(void *handle)
3860 {
3861 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3862
3863 return gfx_v8_0_hw_init(adev);
3864 }
3865
3866 static bool gfx_v8_0_is_idle(void *handle)
3867 {
3868 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3869
3870 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3871 return false;
3872 else
3873 return true;
3874 }
3875
3876 static int gfx_v8_0_wait_for_idle(void *handle)
3877 {
3878 unsigned i;
3879 u32 tmp;
3880 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3881
3882 for (i = 0; i < adev->usec_timeout; i++) {
3883 /* read MC_STATUS */
3884 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3885
3886 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3887 return 0;
3888 udelay(1);
3889 }
3890 return -ETIMEDOUT;
3891 }
3892
3893 static void gfx_v8_0_print_status(void *handle)
3894 {
3895 int i;
3896 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3897
3898 dev_info(adev->dev, "GFX 8.x registers\n");
3899 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
3900 RREG32(mmGRBM_STATUS));
3901 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
3902 RREG32(mmGRBM_STATUS2));
3903 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3904 RREG32(mmGRBM_STATUS_SE0));
3905 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3906 RREG32(mmGRBM_STATUS_SE1));
3907 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3908 RREG32(mmGRBM_STATUS_SE2));
3909 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3910 RREG32(mmGRBM_STATUS_SE3));
3911 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3912 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3913 RREG32(mmCP_STALLED_STAT1));
3914 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3915 RREG32(mmCP_STALLED_STAT2));
3916 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3917 RREG32(mmCP_STALLED_STAT3));
3918 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3919 RREG32(mmCP_CPF_BUSY_STAT));
3920 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3921 RREG32(mmCP_CPF_STALLED_STAT1));
3922 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3923 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3924 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3925 RREG32(mmCP_CPC_STALLED_STAT1));
3926 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3927
3928 for (i = 0; i < 32; i++) {
3929 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
3930 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3931 }
3932 for (i = 0; i < 16; i++) {
3933 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
3934 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3935 }
3936 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3937 dev_info(adev->dev, " se: %d\n", i);
3938 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3939 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
3940 RREG32(mmPA_SC_RASTER_CONFIG));
3941 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
3942 RREG32(mmPA_SC_RASTER_CONFIG_1));
3943 }
3944 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3945
3946 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
3947 RREG32(mmGB_ADDR_CONFIG));
3948 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
3949 RREG32(mmHDP_ADDR_CONFIG));
3950 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
3951 RREG32(mmDMIF_ADDR_CALC));
3952
3953 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
3954 RREG32(mmCP_MEQ_THRESHOLDS));
3955 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
3956 RREG32(mmSX_DEBUG_1));
3957 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
3958 RREG32(mmTA_CNTL_AUX));
3959 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
3960 RREG32(mmSPI_CONFIG_CNTL));
3961 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
3962 RREG32(mmSQ_CONFIG));
3963 dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
3964 RREG32(mmDB_DEBUG));
3965 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
3966 RREG32(mmDB_DEBUG2));
3967 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
3968 RREG32(mmDB_DEBUG3));
3969 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
3970 RREG32(mmCB_HW_CONTROL));
3971 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
3972 RREG32(mmSPI_CONFIG_CNTL_1));
3973 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
3974 RREG32(mmPA_SC_FIFO_SIZE));
3975 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
3976 RREG32(mmVGT_NUM_INSTANCES));
3977 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
3978 RREG32(mmCP_PERFMON_CNTL));
3979 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3980 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3981 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
3982 RREG32(mmVGT_CACHE_INVALIDATION));
3983 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
3984 RREG32(mmVGT_GS_VERTEX_REUSE));
3985 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3986 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3987 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
3988 RREG32(mmPA_CL_ENHANCE));
3989 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
3990 RREG32(mmPA_SC_ENHANCE));
3991
3992 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
3993 RREG32(mmCP_ME_CNTL));
3994 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
3995 RREG32(mmCP_MAX_CONTEXT));
3996 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
3997 RREG32(mmCP_ENDIAN_SWAP));
3998 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
3999 RREG32(mmCP_DEVICE_ID));
4000
4001 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
4002 RREG32(mmCP_SEM_WAIT_TIMER));
4003
4004 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
4005 RREG32(mmCP_RB_WPTR_DELAY));
4006 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
4007 RREG32(mmCP_RB_VMID));
4008 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4009 RREG32(mmCP_RB0_CNTL));
4010 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
4011 RREG32(mmCP_RB0_WPTR));
4012 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
4013 RREG32(mmCP_RB0_RPTR_ADDR));
4014 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
4015 RREG32(mmCP_RB0_RPTR_ADDR_HI));
4016 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
4017 RREG32(mmCP_RB0_CNTL));
4018 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
4019 RREG32(mmCP_RB0_BASE));
4020 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
4021 RREG32(mmCP_RB0_BASE_HI));
4022 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
4023 RREG32(mmCP_MEC_CNTL));
4024 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
4025 RREG32(mmCP_CPF_DEBUG));
4026
4027 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
4028 RREG32(mmSCRATCH_ADDR));
4029 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
4030 RREG32(mmSCRATCH_UMSK));
4031
4032 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
4033 RREG32(mmCP_INT_CNTL_RING0));
4034 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4035 RREG32(mmRLC_LB_CNTL));
4036 dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
4037 RREG32(mmRLC_CNTL));
4038 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
4039 RREG32(mmRLC_CGCG_CGLS_CTRL));
4040 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
4041 RREG32(mmRLC_LB_CNTR_INIT));
4042 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
4043 RREG32(mmRLC_LB_CNTR_MAX));
4044 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
4045 RREG32(mmRLC_LB_INIT_CU_MASK));
4046 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
4047 RREG32(mmRLC_LB_PARAMS));
4048 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
4049 RREG32(mmRLC_LB_CNTL));
4050 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
4051 RREG32(mmRLC_MC_CNTL));
4052 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
4053 RREG32(mmRLC_UCODE_CNTL));
4054
4055 mutex_lock(&adev->srbm_mutex);
4056 for (i = 0; i < 16; i++) {
4057 vi_srbm_select(adev, 0, 0, 0, i);
4058 dev_info(adev->dev, " VM %d:\n", i);
4059 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
4060 RREG32(mmSH_MEM_CONFIG));
4061 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
4062 RREG32(mmSH_MEM_APE1_BASE));
4063 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
4064 RREG32(mmSH_MEM_APE1_LIMIT));
4065 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
4066 RREG32(mmSH_MEM_BASES));
4067 }
4068 vi_srbm_select(adev, 0, 0, 0, 0);
4069 mutex_unlock(&adev->srbm_mutex);
4070 }
4071
4072 static int gfx_v8_0_soft_reset(void *handle)
4073 {
4074 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4075 u32 tmp;
4076 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4077
4078 /* GRBM_STATUS */
4079 tmp = RREG32(mmGRBM_STATUS);
4080 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
4081 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
4082 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
4083 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
4084 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
4085 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
4086 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4087 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4088 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4089 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
4090 }
4091
4092 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
4093 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4094 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
4095 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4096 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4097 }
4098
4099 /* GRBM_STATUS2 */
4100 tmp = RREG32(mmGRBM_STATUS2);
4101 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
4102 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
4103 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
4104
4105 /* SRBM_STATUS */
4106 tmp = RREG32(mmSRBM_STATUS);
4107 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
4108 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
4109 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
4110
4111 if (grbm_soft_reset || srbm_soft_reset) {
4112 gfx_v8_0_print_status((void *)adev);
4113 /* stop the rlc */
4114 gfx_v8_0_rlc_stop(adev);
4115
4116 /* Disable GFX parsing/prefetching */
4117 gfx_v8_0_cp_gfx_enable(adev, false);
4118
4119 /* Disable MEC parsing/prefetching */
4120 gfx_v8_0_cp_compute_enable(adev, false);
4121
4122 if (grbm_soft_reset || srbm_soft_reset) {
4123 tmp = RREG32(mmGMCON_DEBUG);
4124 tmp = REG_SET_FIELD(tmp,
4125 GMCON_DEBUG, GFX_STALL, 1);
4126 tmp = REG_SET_FIELD(tmp,
4127 GMCON_DEBUG, GFX_CLEAR, 1);
4128 WREG32(mmGMCON_DEBUG, tmp);
4129
4130 udelay(50);
4131 }
4132
4133 if (grbm_soft_reset) {
4134 tmp = RREG32(mmGRBM_SOFT_RESET);
4135 tmp |= grbm_soft_reset;
4136 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4137 WREG32(mmGRBM_SOFT_RESET, tmp);
4138 tmp = RREG32(mmGRBM_SOFT_RESET);
4139
4140 udelay(50);
4141
4142 tmp &= ~grbm_soft_reset;
4143 WREG32(mmGRBM_SOFT_RESET, tmp);
4144 tmp = RREG32(mmGRBM_SOFT_RESET);
4145 }
4146
4147 if (srbm_soft_reset) {
4148 tmp = RREG32(mmSRBM_SOFT_RESET);
4149 tmp |= srbm_soft_reset;
4150 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4151 WREG32(mmSRBM_SOFT_RESET, tmp);
4152 tmp = RREG32(mmSRBM_SOFT_RESET);
4153
4154 udelay(50);
4155
4156 tmp &= ~srbm_soft_reset;
4157 WREG32(mmSRBM_SOFT_RESET, tmp);
4158 tmp = RREG32(mmSRBM_SOFT_RESET);
4159 }
4160
4161 if (grbm_soft_reset || srbm_soft_reset) {
4162 tmp = RREG32(mmGMCON_DEBUG);
4163 tmp = REG_SET_FIELD(tmp,
4164 GMCON_DEBUG, GFX_STALL, 0);
4165 tmp = REG_SET_FIELD(tmp,
4166 GMCON_DEBUG, GFX_CLEAR, 0);
4167 WREG32(mmGMCON_DEBUG, tmp);
4168 }
4169
4170 /* Wait a little for things to settle down */
4171 udelay(50);
4172 gfx_v8_0_print_status((void *)adev);
4173 }
4174 return 0;
4175 }
4176
4177 /**
4178 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
4179 *
4180 * @adev: amdgpu_device pointer
4181 *
4182 * Fetches a GPU clock counter snapshot.
4183 * Returns the 64 bit clock counter snapshot.
4184 */
4185 uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
4186 {
4187 uint64_t clock;
4188
4189 mutex_lock(&adev->gfx.gpu_clock_mutex);
4190 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
4191 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
4192 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
4193 mutex_unlock(&adev->gfx.gpu_clock_mutex);
4194 return clock;
4195 }
4196
4197 static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
4198 uint32_t vmid,
4199 uint32_t gds_base, uint32_t gds_size,
4200 uint32_t gws_base, uint32_t gws_size,
4201 uint32_t oa_base, uint32_t oa_size)
4202 {
4203 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
4204 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
4205
4206 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
4207 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
4208
4209 oa_base = oa_base >> AMDGPU_OA_SHIFT;
4210 oa_size = oa_size >> AMDGPU_OA_SHIFT;
4211
4212 /* GDS Base */
4213 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4214 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4215 WRITE_DATA_DST_SEL(0)));
4216 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
4217 amdgpu_ring_write(ring, 0);
4218 amdgpu_ring_write(ring, gds_base);
4219
4220 /* GDS Size */
4221 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4222 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4223 WRITE_DATA_DST_SEL(0)));
4224 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
4225 amdgpu_ring_write(ring, 0);
4226 amdgpu_ring_write(ring, gds_size);
4227
4228 /* GWS */
4229 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4230 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4231 WRITE_DATA_DST_SEL(0)));
4232 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
4233 amdgpu_ring_write(ring, 0);
4234 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
4235
4236 /* OA */
4237 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4238 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4239 WRITE_DATA_DST_SEL(0)));
4240 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
4241 amdgpu_ring_write(ring, 0);
4242 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
4243 }
4244
4245 static int gfx_v8_0_early_init(void *handle)
4246 {
4247 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4248
4249 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
4250 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
4251 gfx_v8_0_set_ring_funcs(adev);
4252 gfx_v8_0_set_irq_funcs(adev);
4253 gfx_v8_0_set_gds_init(adev);
4254 gfx_v8_0_set_rlc_funcs(adev);
4255
4256 return 0;
4257 }
4258
4259 static int gfx_v8_0_late_init(void *handle)
4260 {
4261 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4262 int r;
4263
4264 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
4265 if (r)
4266 return r;
4267
4268 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
4269 if (r)
4270 return r;
4271
4272 /* requires IBs so do in late init after IB pool is initialized */
4273 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
4274 if (r)
4275 return r;
4276
4277 return 0;
4278 }
4279
4280 static int gfx_v8_0_set_powergating_state(void *handle,
4281 enum amd_powergating_state state)
4282 {
4283 return 0;
4284 }
4285
4286 static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
4287 uint32_t reg_addr, uint32_t cmd)
4288 {
4289 uint32_t data;
4290
4291 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4292
4293 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
4294 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
4295
4296 data = RREG32(mmRLC_SERDES_WR_CTRL);
4297 if (adev->asic_type == CHIP_STONEY)
4298 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4299 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4300 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4301 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4302 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4303 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4304 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4305 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4306 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4307 else
4308 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
4309 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
4310 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
4311 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
4312 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
4313 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
4314 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
4315 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
4316 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
4317 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
4318 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
4319 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
4320 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
4321 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
4322 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
4323
4324 WREG32(mmRLC_SERDES_WR_CTRL, data);
4325 }
4326
4327 #define MSG_ENTER_RLC_SAFE_MODE 1
4328 #define MSG_EXIT_RLC_SAFE_MODE 0
4329
4330 #define RLC_GPR_REG2__REQ_MASK 0x00000001
4331 #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
4332 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
4333
4334 static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
4335 {
4336 u32 data = 0;
4337 unsigned i;
4338
4339 data = RREG32(mmRLC_CNTL);
4340 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
4341 return;
4342
4343 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
4344 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
4345 AMD_PG_SUPPORT_GFX_DMG))) {
4346 data |= RLC_GPR_REG2__REQ_MASK;
4347 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
4348 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
4349 WREG32(mmRLC_GPR_REG2, data);
4350
4351 for (i = 0; i < adev->usec_timeout; i++) {
4352 if ((RREG32(mmRLC_GPM_STAT) &
4353 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4354 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
4355 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4356 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
4357 break;
4358 udelay(1);
4359 }
4360
4361 for (i = 0; i < adev->usec_timeout; i++) {
4362 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
4363 break;
4364 udelay(1);
4365 }
4366 adev->gfx.rlc.in_safe_mode = true;
4367 }
4368 }
4369
4370 static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
4371 {
4372 u32 data;
4373 unsigned i;
4374
4375 data = RREG32(mmRLC_CNTL);
4376 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
4377 return;
4378
4379 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
4380 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
4381 AMD_PG_SUPPORT_GFX_DMG))) {
4382 data |= RLC_GPR_REG2__REQ_MASK;
4383 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
4384 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
4385 WREG32(mmRLC_GPR_REG2, data);
4386 adev->gfx.rlc.in_safe_mode = false;
4387 }
4388
4389 for (i = 0; i < adev->usec_timeout; i++) {
4390 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
4391 break;
4392 udelay(1);
4393 }
4394 }
4395
4396 static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
4397 {
4398 u32 data;
4399 unsigned i;
4400
4401 data = RREG32(mmRLC_CNTL);
4402 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
4403 return;
4404
4405 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
4406 data |= RLC_SAFE_MODE__CMD_MASK;
4407 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
4408 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
4409 WREG32(mmRLC_SAFE_MODE, data);
4410
4411 for (i = 0; i < adev->usec_timeout; i++) {
4412 if ((RREG32(mmRLC_GPM_STAT) &
4413 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4414 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
4415 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
4416 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
4417 break;
4418 udelay(1);
4419 }
4420
4421 for (i = 0; i < adev->usec_timeout; i++) {
4422 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
4423 break;
4424 udelay(1);
4425 }
4426 adev->gfx.rlc.in_safe_mode = true;
4427 }
4428 }
4429
4430 static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
4431 {
4432 u32 data = 0;
4433 unsigned i;
4434
4435 data = RREG32(mmRLC_CNTL);
4436 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
4437 return;
4438
4439 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
4440 if (adev->gfx.rlc.in_safe_mode) {
4441 data |= RLC_SAFE_MODE__CMD_MASK;
4442 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
4443 WREG32(mmRLC_SAFE_MODE, data);
4444 adev->gfx.rlc.in_safe_mode = false;
4445 }
4446 }
4447
4448 for (i = 0; i < adev->usec_timeout; i++) {
4449 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
4450 break;
4451 udelay(1);
4452 }
4453 }
4454
4455 static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
4456 {
4457 adev->gfx.rlc.in_safe_mode = true;
4458 }
4459
4460 static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
4461 {
4462 adev->gfx.rlc.in_safe_mode = false;
4463 }
4464
4465 static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
4466 .enter_safe_mode = cz_enter_rlc_safe_mode,
4467 .exit_safe_mode = cz_exit_rlc_safe_mode
4468 };
4469
4470 static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
4471 .enter_safe_mode = iceland_enter_rlc_safe_mode,
4472 .exit_safe_mode = iceland_exit_rlc_safe_mode
4473 };
4474
4475 static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
4476 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
4477 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
4478 };
4479
4480 static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
4481 bool enable)
4482 {
4483 uint32_t temp, data;
4484
4485 adev->gfx.rlc.funcs->enter_safe_mode(adev);
4486
4487 /* It is disabled by HW by default */
4488 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
4489 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
4490 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
4491 /* 1 - RLC memory Light sleep */
4492 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
4493 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4494 if (temp != data)
4495 WREG32(mmRLC_MEM_SLP_CNTL, data);
4496 }
4497
4498 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
4499 /* 2 - CP memory Light sleep */
4500 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
4501 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4502 if (temp != data)
4503 WREG32(mmCP_MEM_SLP_CNTL, data);
4504 }
4505 }
4506
4507 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
4508 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4509 if (adev->flags & AMD_IS_APU)
4510 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4511 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4512 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
4513 else
4514 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4515 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4516 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4517 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4518
4519 if (temp != data)
4520 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4521
4522 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4523 gfx_v8_0_wait_for_rlc_serdes(adev);
4524
4525 /* 5 - clear mgcg override */
4526 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4527
4528 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
4529 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
4530 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4531 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
4532 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
4533 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
4534 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
4535 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
4536 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
4537 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
4538 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
4539 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
4540 if (temp != data)
4541 WREG32(mmCGTS_SM_CTRL_REG, data);
4542 }
4543 udelay(50);
4544
4545 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4546 gfx_v8_0_wait_for_rlc_serdes(adev);
4547 } else {
4548 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
4549 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4550 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
4551 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
4552 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
4553 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
4554 if (temp != data)
4555 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
4556
4557 /* 2 - disable MGLS in RLC */
4558 data = RREG32(mmRLC_MEM_SLP_CNTL);
4559 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
4560 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
4561 WREG32(mmRLC_MEM_SLP_CNTL, data);
4562 }
4563
4564 /* 3 - disable MGLS in CP */
4565 data = RREG32(mmCP_MEM_SLP_CNTL);
4566 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
4567 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
4568 WREG32(mmCP_MEM_SLP_CNTL, data);
4569 }
4570
4571 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
4572 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
4573 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
4574 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
4575 if (temp != data)
4576 WREG32(mmCGTS_SM_CTRL_REG, data);
4577
4578 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4579 gfx_v8_0_wait_for_rlc_serdes(adev);
4580
4581 /* 6 - set mgcg override */
4582 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4583
4584 udelay(50);
4585
4586 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4587 gfx_v8_0_wait_for_rlc_serdes(adev);
4588 }
4589
4590 adev->gfx.rlc.funcs->exit_safe_mode(adev);
4591 }
4592
4593 static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
4594 bool enable)
4595 {
4596 uint32_t temp, temp1, data, data1;
4597
4598 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
4599
4600 adev->gfx.rlc.funcs->enter_safe_mode(adev);
4601
4602 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
4603 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
4604 * Cmp_busy/GFX_Idle interrupts
4605 */
4606 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4607
4608 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4609 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
4610 if (temp1 != data1)
4611 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4612
4613 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4614 gfx_v8_0_wait_for_rlc_serdes(adev);
4615
4616 /* 3 - clear cgcg override */
4617 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
4618
4619 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4620 gfx_v8_0_wait_for_rlc_serdes(adev);
4621
4622 /* 4 - write cmd to set CGLS */
4623 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
4624
4625 /* 5 - enable cgcg */
4626 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
4627
4628 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
4629 /* enable cgls*/
4630 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4631
4632 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4633 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
4634
4635 if (temp1 != data1)
4636 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4637 } else {
4638 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
4639 }
4640
4641 if (temp != data)
4642 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4643 } else {
4644 /* disable cntx_empty_int_enable & GFX Idle interrupt */
4645 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4646
4647 /* TEST CGCG */
4648 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
4649 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
4650 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
4651 if (temp1 != data1)
4652 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
4653
4654 /* read gfx register to wake up cgcg */
4655 RREG32(mmCB_CGTT_SCLK_CTRL);
4656 RREG32(mmCB_CGTT_SCLK_CTRL);
4657 RREG32(mmCB_CGTT_SCLK_CTRL);
4658 RREG32(mmCB_CGTT_SCLK_CTRL);
4659
4660 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4661 gfx_v8_0_wait_for_rlc_serdes(adev);
4662
4663 /* write cmd to Set CGCG Overrride */
4664 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
4665
4666 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
4667 gfx_v8_0_wait_for_rlc_serdes(adev);
4668
4669 /* write cmd to Clear CGLS */
4670 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
4671
4672 /* disable cgcg, cgls should be disabled too. */
4673 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
4674 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
4675 if (temp != data)
4676 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
4677 }
4678
4679 adev->gfx.rlc.funcs->exit_safe_mode(adev);
4680 }
4681 static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
4682 bool enable)
4683 {
4684 if (enable) {
4685 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
4686 * === MGCG + MGLS + TS(CG/LS) ===
4687 */
4688 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
4689 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
4690 } else {
4691 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
4692 * === CGCG + CGLS ===
4693 */
4694 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
4695 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
4696 }
4697 return 0;
4698 }
4699
4700 static int gfx_v8_0_set_clockgating_state(void *handle,
4701 enum amd_clockgating_state state)
4702 {
4703 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4704
4705 switch (adev->asic_type) {
4706 case CHIP_FIJI:
4707 case CHIP_CARRIZO:
4708 case CHIP_STONEY:
4709 gfx_v8_0_update_gfx_clock_gating(adev,
4710 state == AMD_CG_STATE_GATE ? true : false);
4711 break;
4712 default:
4713 break;
4714 }
4715 return 0;
4716 }
4717
4718 static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
4719 {
4720 u32 rptr;
4721
4722 rptr = ring->adev->wb.wb[ring->rptr_offs];
4723
4724 return rptr;
4725 }
4726
4727 static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
4728 {
4729 struct amdgpu_device *adev = ring->adev;
4730 u32 wptr;
4731
4732 if (ring->use_doorbell)
4733 /* XXX check if swapping is necessary on BE */
4734 wptr = ring->adev->wb.wb[ring->wptr_offs];
4735 else
4736 wptr = RREG32(mmCP_RB0_WPTR);
4737
4738 return wptr;
4739 }
4740
4741 static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
4742 {
4743 struct amdgpu_device *adev = ring->adev;
4744
4745 if (ring->use_doorbell) {
4746 /* XXX check if swapping is necessary on BE */
4747 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4748 WDOORBELL32(ring->doorbell_index, ring->wptr);
4749 } else {
4750 WREG32(mmCP_RB0_WPTR, ring->wptr);
4751 (void)RREG32(mmCP_RB0_WPTR);
4752 }
4753 }
4754
4755 static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
4756 {
4757 u32 ref_and_mask, reg_mem_engine;
4758
4759 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
4760 switch (ring->me) {
4761 case 1:
4762 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
4763 break;
4764 case 2:
4765 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
4766 break;
4767 default:
4768 return;
4769 }
4770 reg_mem_engine = 0;
4771 } else {
4772 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
4773 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
4774 }
4775
4776 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4777 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
4778 WAIT_REG_MEM_FUNCTION(3) | /* == */
4779 reg_mem_engine));
4780 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
4781 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
4782 amdgpu_ring_write(ring, ref_and_mask);
4783 amdgpu_ring_write(ring, ref_and_mask);
4784 amdgpu_ring_write(ring, 0x20); /* poll interval */
4785 }
4786
4787 static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
4788 {
4789 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4790 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4791 WRITE_DATA_DST_SEL(0) |
4792 WR_CONFIRM));
4793 amdgpu_ring_write(ring, mmHDP_DEBUG0);
4794 amdgpu_ring_write(ring, 0);
4795 amdgpu_ring_write(ring, 1);
4796
4797 }
4798
4799 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
4800 struct amdgpu_ib *ib)
4801 {
4802 bool need_ctx_switch = ring->current_ctx != ib->ctx;
4803 u32 header, control = 0;
4804 u32 next_rptr = ring->wptr + 5;
4805
4806 /* drop the CE preamble IB for the same context */
4807 if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && !need_ctx_switch)
4808 return;
4809
4810 if (need_ctx_switch)
4811 next_rptr += 2;
4812
4813 next_rptr += 4;
4814 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4815 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4816 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4817 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4818 amdgpu_ring_write(ring, next_rptr);
4819
4820 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
4821 if (need_ctx_switch) {
4822 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4823 amdgpu_ring_write(ring, 0);
4824 }
4825
4826 if (ib->flags & AMDGPU_IB_FLAG_CE)
4827 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4828 else
4829 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4830
4831 control |= ib->length_dw | (ib->vm_id << 24);
4832
4833 amdgpu_ring_write(ring, header);
4834 amdgpu_ring_write(ring,
4835 #ifdef __BIG_ENDIAN
4836 (2 << 0) |
4837 #endif
4838 (ib->gpu_addr & 0xFFFFFFFC));
4839 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4840 amdgpu_ring_write(ring, control);
4841 }
4842
4843 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
4844 struct amdgpu_ib *ib)
4845 {
4846 u32 header, control = 0;
4847 u32 next_rptr = ring->wptr + 5;
4848
4849 control |= INDIRECT_BUFFER_VALID;
4850
4851 next_rptr += 4;
4852 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4853 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
4854 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4855 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
4856 amdgpu_ring_write(ring, next_rptr);
4857
4858 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4859
4860 control |= ib->length_dw | (ib->vm_id << 24);
4861
4862 amdgpu_ring_write(ring, header);
4863 amdgpu_ring_write(ring,
4864 #ifdef __BIG_ENDIAN
4865 (2 << 0) |
4866 #endif
4867 (ib->gpu_addr & 0xFFFFFFFC));
4868 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4869 amdgpu_ring_write(ring, control);
4870 }
4871
4872 static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
4873 u64 seq, unsigned flags)
4874 {
4875 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4876 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4877
4878 /* EVENT_WRITE_EOP - flush caches, send int */
4879 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
4880 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4881 EOP_TC_ACTION_EN |
4882 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4883 EVENT_INDEX(5)));
4884 amdgpu_ring_write(ring, addr & 0xfffffffc);
4885 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
4886 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
4887 amdgpu_ring_write(ring, lower_32_bits(seq));
4888 amdgpu_ring_write(ring, upper_32_bits(seq));
4889
4890 }
4891
4892 static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
4893 {
4894 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4895 uint32_t seq = ring->fence_drv.sync_seq;
4896 uint64_t addr = ring->fence_drv.gpu_addr;
4897
4898 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4899 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
4900 WAIT_REG_MEM_FUNCTION(3) | /* equal */
4901 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
4902 amdgpu_ring_write(ring, addr & 0xfffffffc);
4903 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
4904 amdgpu_ring_write(ring, seq);
4905 amdgpu_ring_write(ring, 0xffffffff);
4906 amdgpu_ring_write(ring, 4); /* poll interval */
4907
4908 if (usepfp) {
4909 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
4910 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4911 amdgpu_ring_write(ring, 0);
4912 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4913 amdgpu_ring_write(ring, 0);
4914 }
4915 }
4916
4917 static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
4918 unsigned vm_id, uint64_t pd_addr)
4919 {
4920 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
4921
4922 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4923 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
4924 WRITE_DATA_DST_SEL(0)) |
4925 WR_CONFIRM);
4926 if (vm_id < 8) {
4927 amdgpu_ring_write(ring,
4928 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
4929 } else {
4930 amdgpu_ring_write(ring,
4931 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
4932 }
4933 amdgpu_ring_write(ring, 0);
4934 amdgpu_ring_write(ring, pd_addr >> 12);
4935
4936 /* bits 0-15 are the VM contexts0-15 */
4937 /* invalidate the cache */
4938 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4939 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4940 WRITE_DATA_DST_SEL(0)));
4941 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4942 amdgpu_ring_write(ring, 0);
4943 amdgpu_ring_write(ring, 1 << vm_id);
4944
4945 /* wait for the invalidate to complete */
4946 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
4947 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
4948 WAIT_REG_MEM_FUNCTION(0) | /* always */
4949 WAIT_REG_MEM_ENGINE(0))); /* me */
4950 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
4951 amdgpu_ring_write(ring, 0);
4952 amdgpu_ring_write(ring, 0); /* ref */
4953 amdgpu_ring_write(ring, 0); /* mask */
4954 amdgpu_ring_write(ring, 0x20); /* poll interval */
4955
4956 /* compute doesn't have PFP */
4957 if (usepfp) {
4958 /* sync PFP to ME, otherwise we might get invalid PFP reads */
4959 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4960 amdgpu_ring_write(ring, 0x0);
4961 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4962 amdgpu_ring_write(ring, 0);
4963 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4964 amdgpu_ring_write(ring, 0);
4965 }
4966 }
4967
4968 static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
4969 {
4970 return ring->adev->wb.wb[ring->rptr_offs];
4971 }
4972
4973 static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
4974 {
4975 return ring->adev->wb.wb[ring->wptr_offs];
4976 }
4977
4978 static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
4979 {
4980 struct amdgpu_device *adev = ring->adev;
4981
4982 /* XXX check if swapping is necessary on BE */
4983 adev->wb.wb[ring->wptr_offs] = ring->wptr;
4984 WDOORBELL32(ring->doorbell_index, ring->wptr);
4985 }
4986
4987 static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
4988 u64 addr, u64 seq,
4989 unsigned flags)
4990 {
4991 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
4992 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
4993
4994 /* RELEASE_MEM - flush caches, send int */
4995 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4996 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
4997 EOP_TC_ACTION_EN |
4998 EOP_TC_WB_ACTION_EN |
4999 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5000 EVENT_INDEX(5)));
5001 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
5002 amdgpu_ring_write(ring, addr & 0xfffffffc);
5003 amdgpu_ring_write(ring, upper_32_bits(addr));
5004 amdgpu_ring_write(ring, lower_32_bits(seq));
5005 amdgpu_ring_write(ring, upper_32_bits(seq));
5006 }
5007
5008 static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
5009 enum amdgpu_interrupt_state state)
5010 {
5011 u32 cp_int_cntl;
5012
5013 switch (state) {
5014 case AMDGPU_IRQ_STATE_DISABLE:
5015 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5016 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5017 TIME_STAMP_INT_ENABLE, 0);
5018 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5019 break;
5020 case AMDGPU_IRQ_STATE_ENABLE:
5021 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5022 cp_int_cntl =
5023 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5024 TIME_STAMP_INT_ENABLE, 1);
5025 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5026 break;
5027 default:
5028 break;
5029 }
5030 }
5031
5032 static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
5033 int me, int pipe,
5034 enum amdgpu_interrupt_state state)
5035 {
5036 u32 mec_int_cntl, mec_int_cntl_reg;
5037
5038 /*
5039 * amdgpu controls only pipe 0 of MEC1. That's why this function only
5040 * handles the setting of interrupts for this specific pipe. All other
5041 * pipes' interrupts are set by amdkfd.
5042 */
5043
5044 if (me == 1) {
5045 switch (pipe) {
5046 case 0:
5047 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
5048 break;
5049 default:
5050 DRM_DEBUG("invalid pipe %d\n", pipe);
5051 return;
5052 }
5053 } else {
5054 DRM_DEBUG("invalid me %d\n", me);
5055 return;
5056 }
5057
5058 switch (state) {
5059 case AMDGPU_IRQ_STATE_DISABLE:
5060 mec_int_cntl = RREG32(mec_int_cntl_reg);
5061 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5062 TIME_STAMP_INT_ENABLE, 0);
5063 WREG32(mec_int_cntl_reg, mec_int_cntl);
5064 break;
5065 case AMDGPU_IRQ_STATE_ENABLE:
5066 mec_int_cntl = RREG32(mec_int_cntl_reg);
5067 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
5068 TIME_STAMP_INT_ENABLE, 1);
5069 WREG32(mec_int_cntl_reg, mec_int_cntl);
5070 break;
5071 default:
5072 break;
5073 }
5074 }
5075
5076 static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
5077 struct amdgpu_irq_src *source,
5078 unsigned type,
5079 enum amdgpu_interrupt_state state)
5080 {
5081 u32 cp_int_cntl;
5082
5083 switch (state) {
5084 case AMDGPU_IRQ_STATE_DISABLE:
5085 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5086 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5087 PRIV_REG_INT_ENABLE, 0);
5088 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5089 break;
5090 case AMDGPU_IRQ_STATE_ENABLE:
5091 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5092 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5093 PRIV_REG_INT_ENABLE, 1);
5094 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5095 break;
5096 default:
5097 break;
5098 }
5099
5100 return 0;
5101 }
5102
5103 static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
5104 struct amdgpu_irq_src *source,
5105 unsigned type,
5106 enum amdgpu_interrupt_state state)
5107 {
5108 u32 cp_int_cntl;
5109
5110 switch (state) {
5111 case AMDGPU_IRQ_STATE_DISABLE:
5112 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5113 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5114 PRIV_INSTR_INT_ENABLE, 0);
5115 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5116 break;
5117 case AMDGPU_IRQ_STATE_ENABLE:
5118 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
5119 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
5120 PRIV_INSTR_INT_ENABLE, 1);
5121 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
5122 break;
5123 default:
5124 break;
5125 }
5126
5127 return 0;
5128 }
5129
5130 static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
5131 struct amdgpu_irq_src *src,
5132 unsigned type,
5133 enum amdgpu_interrupt_state state)
5134 {
5135 switch (type) {
5136 case AMDGPU_CP_IRQ_GFX_EOP:
5137 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
5138 break;
5139 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
5140 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
5141 break;
5142 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
5143 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
5144 break;
5145 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
5146 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
5147 break;
5148 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
5149 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
5150 break;
5151 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
5152 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
5153 break;
5154 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
5155 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
5156 break;
5157 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
5158 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
5159 break;
5160 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
5161 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
5162 break;
5163 default:
5164 break;
5165 }
5166 return 0;
5167 }
5168
5169 static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
5170 struct amdgpu_irq_src *source,
5171 struct amdgpu_iv_entry *entry)
5172 {
5173 int i;
5174 u8 me_id, pipe_id, queue_id;
5175 struct amdgpu_ring *ring;
5176
5177 DRM_DEBUG("IH: CP EOP\n");
5178 me_id = (entry->ring_id & 0x0c) >> 2;
5179 pipe_id = (entry->ring_id & 0x03) >> 0;
5180 queue_id = (entry->ring_id & 0x70) >> 4;
5181
5182 switch (me_id) {
5183 case 0:
5184 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
5185 break;
5186 case 1:
5187 case 2:
5188 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
5189 ring = &adev->gfx.compute_ring[i];
5190 /* Per-queue interrupt is supported for MEC starting from VI.
5191 * The interrupt can only be enabled/disabled per pipe instead of per queue.
5192 */
5193 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
5194 amdgpu_fence_process(ring);
5195 }
5196 break;
5197 }
5198 return 0;
5199 }
5200
5201 static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
5202 struct amdgpu_irq_src *source,
5203 struct amdgpu_iv_entry *entry)
5204 {
5205 DRM_ERROR("Illegal register access in command stream\n");
5206 schedule_work(&adev->reset_work);
5207 return 0;
5208 }
5209
5210 static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
5211 struct amdgpu_irq_src *source,
5212 struct amdgpu_iv_entry *entry)
5213 {
5214 DRM_ERROR("Illegal instruction in command stream\n");
5215 schedule_work(&adev->reset_work);
5216 return 0;
5217 }
5218
5219 const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
5220 .early_init = gfx_v8_0_early_init,
5221 .late_init = gfx_v8_0_late_init,
5222 .sw_init = gfx_v8_0_sw_init,
5223 .sw_fini = gfx_v8_0_sw_fini,
5224 .hw_init = gfx_v8_0_hw_init,
5225 .hw_fini = gfx_v8_0_hw_fini,
5226 .suspend = gfx_v8_0_suspend,
5227 .resume = gfx_v8_0_resume,
5228 .is_idle = gfx_v8_0_is_idle,
5229 .wait_for_idle = gfx_v8_0_wait_for_idle,
5230 .soft_reset = gfx_v8_0_soft_reset,
5231 .print_status = gfx_v8_0_print_status,
5232 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
5233 .set_powergating_state = gfx_v8_0_set_powergating_state,
5234 };
5235
5236 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
5237 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
5238 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
5239 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
5240 .parse_cs = NULL,
5241 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
5242 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
5243 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5244 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5245 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5246 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5247 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5248 .test_ring = gfx_v8_0_ring_test_ring,
5249 .test_ib = gfx_v8_0_ring_test_ib,
5250 .insert_nop = amdgpu_ring_insert_nop,
5251 .pad_ib = amdgpu_ring_generic_pad_ib,
5252 };
5253
5254 static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
5255 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
5256 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
5257 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
5258 .parse_cs = NULL,
5259 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
5260 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
5261 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
5262 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
5263 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
5264 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
5265 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
5266 .test_ring = gfx_v8_0_ring_test_ring,
5267 .test_ib = gfx_v8_0_ring_test_ib,
5268 .insert_nop = amdgpu_ring_insert_nop,
5269 .pad_ib = amdgpu_ring_generic_pad_ib,
5270 };
5271
5272 static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
5273 {
5274 int i;
5275
5276 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
5277 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
5278
5279 for (i = 0; i < adev->gfx.num_compute_rings; i++)
5280 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
5281 }
5282
5283 static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
5284 .set = gfx_v8_0_set_eop_interrupt_state,
5285 .process = gfx_v8_0_eop_irq,
5286 };
5287
5288 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
5289 .set = gfx_v8_0_set_priv_reg_fault_state,
5290 .process = gfx_v8_0_priv_reg_irq,
5291 };
5292
5293 static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
5294 .set = gfx_v8_0_set_priv_inst_fault_state,
5295 .process = gfx_v8_0_priv_inst_irq,
5296 };
5297
5298 static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
5299 {
5300 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
5301 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
5302
5303 adev->gfx.priv_reg_irq.num_types = 1;
5304 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
5305
5306 adev->gfx.priv_inst_irq.num_types = 1;
5307 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
5308 }
5309
5310 static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
5311 {
5312 switch (adev->asic_type) {
5313 case CHIP_TOPAZ:
5314 case CHIP_STONEY:
5315 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
5316 break;
5317 case CHIP_CARRIZO:
5318 adev->gfx.rlc.funcs = &cz_rlc_funcs;
5319 break;
5320 default:
5321 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
5322 break;
5323 }
5324 }
5325
5326 static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
5327 {
5328 /* init asci gds info */
5329 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
5330 adev->gds.gws.total_size = 64;
5331 adev->gds.oa.total_size = 16;
5332
5333 if (adev->gds.mem.total_size == 64 * 1024) {
5334 adev->gds.mem.gfx_partition_size = 4096;
5335 adev->gds.mem.cs_partition_size = 4096;
5336
5337 adev->gds.gws.gfx_partition_size = 4;
5338 adev->gds.gws.cs_partition_size = 4;
5339
5340 adev->gds.oa.gfx_partition_size = 4;
5341 adev->gds.oa.cs_partition_size = 1;
5342 } else {
5343 adev->gds.mem.gfx_partition_size = 1024;
5344 adev->gds.mem.cs_partition_size = 1024;
5345
5346 adev->gds.gws.gfx_partition_size = 16;
5347 adev->gds.gws.cs_partition_size = 16;
5348
5349 adev->gds.oa.gfx_partition_size = 4;
5350 adev->gds.oa.cs_partition_size = 4;
5351 }
5352 }
5353
5354 static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
5355 {
5356 u32 data, mask;
5357
5358 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
5359 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
5360
5361 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
5362 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
5363
5364 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
5365
5366 return (~data) & mask;
5367 }
5368
5369 int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
5370 struct amdgpu_cu_info *cu_info)
5371 {
5372 int i, j, k, counter, active_cu_number = 0;
5373 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
5374
5375 if (!adev || !cu_info)
5376 return -EINVAL;
5377
5378 memset(cu_info, 0, sizeof(*cu_info));
5379
5380 mutex_lock(&adev->grbm_idx_mutex);
5381 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
5382 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
5383 mask = 1;
5384 ao_bitmap = 0;
5385 counter = 0;
5386 gfx_v8_0_select_se_sh(adev, i, j);
5387 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
5388 cu_info->bitmap[i][j] = bitmap;
5389
5390 for (k = 0; k < 16; k ++) {
5391 if (bitmap & mask) {
5392 if (counter < 2)
5393 ao_bitmap |= mask;
5394 counter ++;
5395 }
5396 mask <<= 1;
5397 }
5398 active_cu_number += counter;
5399 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
5400 }
5401 }
5402 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
5403 mutex_unlock(&adev->grbm_idx_mutex);
5404
5405 cu_info->number = active_cu_number;
5406 cu_info->ao_cu_mask = ao_cu_mask;
5407
5408 return 0;
5409 }
This page took 0.154565 seconds and 5 git commands to generate.