Merge remote-tracking branch 'media_tree/vsp1' into generic-zpos-v8
[deliverable/linux.git] / drivers / gpu / drm / amd / amdgpu / gfx_v8_0.c
CommitLineData
aaa36a97
AD
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
28#include "vid.h"
29#include "amdgpu_ucode.h"
68182d90 30#include "amdgpu_atombios.h"
eeade25a 31#include "atombios_i2c.h"
aaa36a97
AD
32#include "clearstate_vi.h"
33
34#include "gmc/gmc_8_2_d.h"
35#include "gmc/gmc_8_2_sh_mask.h"
36
37#include "oss/oss_3_0_d.h"
38#include "oss/oss_3_0_sh_mask.h"
39
40#include "bif/bif_5_0_d.h"
41#include "bif/bif_5_0_sh_mask.h"
42
43#include "gca/gfx_8_0_d.h"
44#include "gca/gfx_8_0_enum.h"
45#include "gca/gfx_8_0_sh_mask.h"
46#include "gca/gfx_8_0_enum.h"
47
aaa36a97
AD
48#include "dce/dce_10_0_d.h"
49#include "dce/dce_10_0_sh_mask.h"
50
d9d533c1
KW
51#include "smu/smu_7_1_3_d.h"
52
aaa36a97
AD
53#define GFX8_NUM_GFX_RINGS 1
54#define GFX8_NUM_COMPUTE_RINGS 8
55
56#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
57#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
2cc0c0b5 58#define POLARIS11_GB_ADDR_CONFIG_GOLDEN 0x22011002
aaa36a97
AD
59#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
60
61#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
62#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
63#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
64#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
65#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
66#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
67#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
68#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
69#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
70
6e378858
EH
71#define RLC_CGTT_MGCG_OVERRIDE__CPF_MASK 0x00000001L
72#define RLC_CGTT_MGCG_OVERRIDE__RLC_MASK 0x00000002L
73#define RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK 0x00000004L
74#define RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK 0x00000008L
75#define RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK 0x00000010L
76#define RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK 0x00000020L
77
78/* BPM SERDES CMD */
79#define SET_BPM_SERDES_CMD 1
80#define CLE_BPM_SERDES_CMD 0
81
82/* BPM Register Address*/
83enum {
84 BPM_REG_CGLS_EN = 0, /* Enable/Disable CGLS */
85 BPM_REG_CGLS_ON, /* ON/OFF CGLS: shall be controlled by RLC FW */
86 BPM_REG_CGCG_OVERRIDE, /* Set/Clear CGCG Override */
87 BPM_REG_MGCG_OVERRIDE, /* Set/Clear MGCG Override */
88 BPM_REG_FGCG_OVERRIDE, /* Set/Clear FGCG Override */
89 BPM_REG_FGCG_MAX
90};
91
2b6cd977
EH
92#define RLC_FormatDirectRegListLength 14
93
c65444fe
JZ
94MODULE_FIRMWARE("amdgpu/carrizo_ce.bin");
95MODULE_FIRMWARE("amdgpu/carrizo_pfp.bin");
96MODULE_FIRMWARE("amdgpu/carrizo_me.bin");
97MODULE_FIRMWARE("amdgpu/carrizo_mec.bin");
98MODULE_FIRMWARE("amdgpu/carrizo_mec2.bin");
99MODULE_FIRMWARE("amdgpu/carrizo_rlc.bin");
100
e3c7656c
SL
101MODULE_FIRMWARE("amdgpu/stoney_ce.bin");
102MODULE_FIRMWARE("amdgpu/stoney_pfp.bin");
103MODULE_FIRMWARE("amdgpu/stoney_me.bin");
104MODULE_FIRMWARE("amdgpu/stoney_mec.bin");
105MODULE_FIRMWARE("amdgpu/stoney_rlc.bin");
106
c65444fe
JZ
107MODULE_FIRMWARE("amdgpu/tonga_ce.bin");
108MODULE_FIRMWARE("amdgpu/tonga_pfp.bin");
109MODULE_FIRMWARE("amdgpu/tonga_me.bin");
110MODULE_FIRMWARE("amdgpu/tonga_mec.bin");
111MODULE_FIRMWARE("amdgpu/tonga_mec2.bin");
112MODULE_FIRMWARE("amdgpu/tonga_rlc.bin");
113
114MODULE_FIRMWARE("amdgpu/topaz_ce.bin");
115MODULE_FIRMWARE("amdgpu/topaz_pfp.bin");
116MODULE_FIRMWARE("amdgpu/topaz_me.bin");
117MODULE_FIRMWARE("amdgpu/topaz_mec.bin");
c65444fe 118MODULE_FIRMWARE("amdgpu/topaz_rlc.bin");
aaa36a97 119
af15a2d5
DZ
120MODULE_FIRMWARE("amdgpu/fiji_ce.bin");
121MODULE_FIRMWARE("amdgpu/fiji_pfp.bin");
122MODULE_FIRMWARE("amdgpu/fiji_me.bin");
123MODULE_FIRMWARE("amdgpu/fiji_mec.bin");
124MODULE_FIRMWARE("amdgpu/fiji_mec2.bin");
125MODULE_FIRMWARE("amdgpu/fiji_rlc.bin");
126
2cc0c0b5
FC
127MODULE_FIRMWARE("amdgpu/polaris11_ce.bin");
128MODULE_FIRMWARE("amdgpu/polaris11_pfp.bin");
129MODULE_FIRMWARE("amdgpu/polaris11_me.bin");
130MODULE_FIRMWARE("amdgpu/polaris11_mec.bin");
131MODULE_FIRMWARE("amdgpu/polaris11_mec2.bin");
132MODULE_FIRMWARE("amdgpu/polaris11_rlc.bin");
68182d90 133
2cc0c0b5
FC
134MODULE_FIRMWARE("amdgpu/polaris10_ce.bin");
135MODULE_FIRMWARE("amdgpu/polaris10_pfp.bin");
136MODULE_FIRMWARE("amdgpu/polaris10_me.bin");
137MODULE_FIRMWARE("amdgpu/polaris10_mec.bin");
138MODULE_FIRMWARE("amdgpu/polaris10_mec2.bin");
139MODULE_FIRMWARE("amdgpu/polaris10_rlc.bin");
68182d90 140
aaa36a97
AD
141static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
142{
143 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
144 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
145 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
146 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
147 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
148 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
149 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
150 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
151 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
152 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
153 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
154 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
155 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
156 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
157 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
158 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
159};
160
161static const u32 golden_settings_tonga_a11[] =
162{
163 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
164 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
165 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
166 mmGB_GPU_ID, 0x0000000f, 0x00000000,
167 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
168 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
169 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 170 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
171 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
172 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 173 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
174 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
175 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
176 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
6a00a09e 177 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
aaa36a97
AD
178};
179
180static const u32 tonga_golden_common_all[] =
181{
182 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
183 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
184 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
185 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
186 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
187 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
188 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
189 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
190};
191
192static const u32 tonga_mgcg_cgcg_init[] =
193{
194 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
195 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
196 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
197 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
198 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
199 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
200 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
201 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
202 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
203 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
204 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
205 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
206 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
207 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
208 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
209 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
210 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
211 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
212 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
213 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
214 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
215 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
216 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
217 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
218 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
219 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
220 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
221 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
222 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
223 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
224 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
225 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
226 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
227 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
228 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
229 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
230 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
231 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
232 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
233 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
234 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
235 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
236 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
237 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
238 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
239 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
240 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
241 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
242 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
243 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
244 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
245 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
246 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
247 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
248 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
249 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
250 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
251 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
252 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
253 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
254 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
255 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
256 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
257 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
258 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
259 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
260 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
261 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
262 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
263 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
264 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
265 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
266 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
267 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
268 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
269};
270
2cc0c0b5 271static const u32 golden_settings_polaris11_a11[] =
68182d90 272{
b9934878 273 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00006208,
68182d90
FC
274 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
275 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
276 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
277 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
b9934878
FC
278 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
279 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
68182d90
FC
280 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
281 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
282 mmSQ_CONFIG, 0x07f80000, 0x07180000,
283 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
284 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
285 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f3,
286 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
287 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003210,
795c2109 288 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
289};
290
2cc0c0b5 291static const u32 polaris11_golden_common_all[] =
68182d90
FC
292{
293 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
68182d90
FC
294 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011002,
295 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
296 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
297 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
298 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
299};
300
2cc0c0b5 301static const u32 golden_settings_polaris10_a11[] =
68182d90
FC
302{
303 mmATC_MISC_CG, 0x000c0fc0, 0x000c0200,
92d15768
RZ
304 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
305 mmCB_HW_CONTROL_2, 0, 0x0f000000,
68182d90
FC
306 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
307 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
308 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
309 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
310 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x16000012,
311 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x0000002a,
312 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
313 mmRLC_CGCG_CGLS_CTRL_3D, 0xffffffff, 0x0001003c,
314 mmSQ_CONFIG, 0x07f80000, 0x07180000,
315 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
316 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
317 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f7,
318 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
795c2109 319 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
68182d90
FC
320};
321
2cc0c0b5 322static const u32 polaris10_golden_common_all[] =
68182d90
FC
323{
324 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
325 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
326 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
327 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
328 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
329 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
330 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
331 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
332};
333
af15a2d5
DZ
334static const u32 fiji_golden_common_all[] =
335{
336 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
337 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x3a00161a,
338 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002e,
a7ca8ef9 339 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
af15a2d5
DZ
340 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
341 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
342 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
a7ca8ef9
FC
343 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
344 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
345 mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x00000009,
af15a2d5
DZ
346};
347
348static const u32 golden_settings_fiji_a10[] =
349{
350 mmCB_HW_CONTROL_3, 0x000001ff, 0x00000040,
351 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
352 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
af15a2d5 353 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
a7ca8ef9
FC
354 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
355 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
af15a2d5 356 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
a7ca8ef9
FC
357 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
358 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
af15a2d5 359 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000ff,
a7ca8ef9 360 mmVGT_RESET_DEBUG, 0x00000004, 0x00000004,
af15a2d5
DZ
361};
362
363static const u32 fiji_mgcg_cgcg_init[] =
364{
a7ca8ef9 365 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
af15a2d5
DZ
366 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
367 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
369 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
370 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
371 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
372 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
373 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
374 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
375 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
376 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
377 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
378 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
379 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
380 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
381 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
382 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
383 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
384 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
385 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
386 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
387 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
388 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
389 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
390 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
391 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
392 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
393 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
394 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
395 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
396 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
397 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
398 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
399 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
400};
401
aaa36a97
AD
402static const u32 golden_settings_iceland_a11[] =
403{
404 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
405 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
406 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
407 mmGB_GPU_ID, 0x0000000f, 0x00000000,
408 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
409 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
410 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
411 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
6a00a09e 412 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97
AD
413 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
414 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
6a00a09e 415 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
416 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
417 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
418 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
419};
420
421static const u32 iceland_golden_common_all[] =
422{
423 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
424 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
425 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
426 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
427 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
428 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
429 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
430 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
431};
432
433static const u32 iceland_mgcg_cgcg_init[] =
434{
435 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
436 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
437 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
438 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
439 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
440 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
441 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
442 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
443 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
444 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
445 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
446 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
447 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
448 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
449 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
450 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
451 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
452 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
453 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
454 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
455 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
456 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
457 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
458 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
459 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
460 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
461 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
462 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
463 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
464 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
465 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
466 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
467 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
468 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
469 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
470 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
471 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
472 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
473 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
474 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
475 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
476 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
477 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
478 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
479 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
480 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
481 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
482 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
483 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
484 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
485 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
486 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
487 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
488 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
489 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
490 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
491 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
492 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
493 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
494 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
495 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
496 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
497 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
498 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
499};
500
501static const u32 cz_golden_settings_a11[] =
502{
503 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
504 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
505 mmGB_GPU_ID, 0x0000000f, 0x00000000,
506 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
507 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
6a00a09e 508 mmSQ_RANDOM_WAVE_PRI, 0x001fffff, 0x000006fd,
aaa36a97 509 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
6a00a09e 510 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
aaa36a97
AD
511 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
512 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
513};
514
515static const u32 cz_golden_common_all[] =
516{
517 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
518 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
519 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
520 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
521 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
522 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
523 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
524 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
525};
526
527static const u32 cz_mgcg_cgcg_init[] =
528{
529 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
530 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
531 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
532 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
533 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
534 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
535 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
536 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
537 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
538 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
539 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
540 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
541 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
542 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
543 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
544 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
545 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
546 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
547 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
548 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
549 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
550 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
551 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
552 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
553 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
554 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
555 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
556 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
557 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
558 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
559 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
560 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
561 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
562 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
563 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
564 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
565 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
566 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
567 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
568 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
569 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
570 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
571 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
572 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
573 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
574 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
575 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
576 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
577 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
578 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
579 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
580 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
581 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
582 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
583 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
584 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
585 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
586 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
587 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
588 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
589 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
590 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
591 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
592 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
593 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
594 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
595 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
596 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
597 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
598 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
599 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
600 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
601 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
602 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
603 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
604};
605
e3c7656c
SL
606static const u32 stoney_golden_settings_a11[] =
607{
608 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
609 mmGB_GPU_ID, 0x0000000f, 0x00000000,
610 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
611 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
612 mmRLC_CGCG_CGLS_CTRL, 0x00000003, 0x0001003c,
613 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
edf600da 614 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
e3c7656c
SL
615 mmTCC_EXE_DISABLE, 0x00000002, 0x00000002,
616 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f1,
617 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x10101010,
618};
619
620static const u32 stoney_golden_common_all[] =
621{
622 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
623 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000000,
624 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
625 mmGB_ADDR_CONFIG, 0xffffffff, 0x12010001,
626 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
627 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
628 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
629 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF,
630};
631
632static const u32 stoney_mgcg_cgcg_init[] =
633{
634 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
635 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
636 mmCP_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
637 mmRLC_MEM_SLP_CNTL, 0xffffffff, 0x00020201,
638 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96940200,
639 mmATC_MISC_CG, 0xffffffff, 0x000c0200,
640};
641
aaa36a97
AD
642static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
643static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
644static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
dbff57bc 645static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev);
2b6cd977 646static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev);
7dae69a2 647static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev);
aaa36a97
AD
648
649static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
650{
651 switch (adev->asic_type) {
652 case CHIP_TOPAZ:
653 amdgpu_program_register_sequence(adev,
654 iceland_mgcg_cgcg_init,
655 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
656 amdgpu_program_register_sequence(adev,
657 golden_settings_iceland_a11,
658 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
659 amdgpu_program_register_sequence(adev,
660 iceland_golden_common_all,
661 (const u32)ARRAY_SIZE(iceland_golden_common_all));
662 break;
af15a2d5
DZ
663 case CHIP_FIJI:
664 amdgpu_program_register_sequence(adev,
665 fiji_mgcg_cgcg_init,
666 (const u32)ARRAY_SIZE(fiji_mgcg_cgcg_init));
667 amdgpu_program_register_sequence(adev,
668 golden_settings_fiji_a10,
669 (const u32)ARRAY_SIZE(golden_settings_fiji_a10));
670 amdgpu_program_register_sequence(adev,
671 fiji_golden_common_all,
672 (const u32)ARRAY_SIZE(fiji_golden_common_all));
673 break;
674
aaa36a97
AD
675 case CHIP_TONGA:
676 amdgpu_program_register_sequence(adev,
677 tonga_mgcg_cgcg_init,
678 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
679 amdgpu_program_register_sequence(adev,
680 golden_settings_tonga_a11,
681 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
682 amdgpu_program_register_sequence(adev,
683 tonga_golden_common_all,
684 (const u32)ARRAY_SIZE(tonga_golden_common_all));
685 break;
2cc0c0b5 686 case CHIP_POLARIS11:
68182d90 687 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
688 golden_settings_polaris11_a11,
689 (const u32)ARRAY_SIZE(golden_settings_polaris11_a11));
68182d90 690 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
691 polaris11_golden_common_all,
692 (const u32)ARRAY_SIZE(polaris11_golden_common_all));
68182d90 693 break;
2cc0c0b5 694 case CHIP_POLARIS10:
68182d90 695 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
696 golden_settings_polaris10_a11,
697 (const u32)ARRAY_SIZE(golden_settings_polaris10_a11));
68182d90 698 amdgpu_program_register_sequence(adev,
2cc0c0b5
FC
699 polaris10_golden_common_all,
700 (const u32)ARRAY_SIZE(polaris10_golden_common_all));
d9d533c1 701 WREG32_SMC(ixCG_ACLK_CNTL, 0x0000001C);
eeade25a
KW
702 if (adev->pdev->revision == 0xc7) {
703 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1E, 0xDD);
704 amdgpu_atombios_i2c_channel_trans(adev, 0x10, 0x96, 0x1F, 0xD0);
705 }
68182d90 706 break;
aaa36a97
AD
707 case CHIP_CARRIZO:
708 amdgpu_program_register_sequence(adev,
709 cz_mgcg_cgcg_init,
710 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
711 amdgpu_program_register_sequence(adev,
712 cz_golden_settings_a11,
713 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
714 amdgpu_program_register_sequence(adev,
715 cz_golden_common_all,
716 (const u32)ARRAY_SIZE(cz_golden_common_all));
717 break;
e3c7656c
SL
718 case CHIP_STONEY:
719 amdgpu_program_register_sequence(adev,
720 stoney_mgcg_cgcg_init,
721 (const u32)ARRAY_SIZE(stoney_mgcg_cgcg_init));
722 amdgpu_program_register_sequence(adev,
723 stoney_golden_settings_a11,
724 (const u32)ARRAY_SIZE(stoney_golden_settings_a11));
725 amdgpu_program_register_sequence(adev,
726 stoney_golden_common_all,
727 (const u32)ARRAY_SIZE(stoney_golden_common_all));
728 break;
aaa36a97
AD
729 default:
730 break;
731 }
732}
733
734static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
735{
736 int i;
737
738 adev->gfx.scratch.num_reg = 7;
739 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
740 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
741 adev->gfx.scratch.free[i] = true;
742 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
743 }
744}
745
746static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
747{
748 struct amdgpu_device *adev = ring->adev;
749 uint32_t scratch;
750 uint32_t tmp = 0;
751 unsigned i;
752 int r;
753
754 r = amdgpu_gfx_scratch_get(adev, &scratch);
755 if (r) {
756 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
757 return r;
758 }
759 WREG32(scratch, 0xCAFEDEAD);
a27de35c 760 r = amdgpu_ring_alloc(ring, 3);
aaa36a97
AD
761 if (r) {
762 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
763 ring->idx, r);
764 amdgpu_gfx_scratch_free(adev, scratch);
765 return r;
766 }
767 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
768 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
769 amdgpu_ring_write(ring, 0xDEADBEEF);
a27de35c 770 amdgpu_ring_commit(ring);
aaa36a97
AD
771
772 for (i = 0; i < adev->usec_timeout; i++) {
773 tmp = RREG32(scratch);
774 if (tmp == 0xDEADBEEF)
775 break;
776 DRM_UDELAY(1);
777 }
778 if (i < adev->usec_timeout) {
779 DRM_INFO("ring test on %d succeeded in %d usecs\n",
780 ring->idx, i);
781 } else {
782 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
783 ring->idx, scratch, tmp);
784 r = -EINVAL;
785 }
786 amdgpu_gfx_scratch_free(adev, scratch);
787 return r;
788}
789
790static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
791{
792 struct amdgpu_device *adev = ring->adev;
793 struct amdgpu_ib ib;
1763552e 794 struct fence *f = NULL;
aaa36a97
AD
795 uint32_t scratch;
796 uint32_t tmp = 0;
aaa36a97
AD
797 int r;
798
799 r = amdgpu_gfx_scratch_get(adev, &scratch);
800 if (r) {
801 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
802 return r;
803 }
804 WREG32(scratch, 0xCAFEDEAD);
b203dd95 805 memset(&ib, 0, sizeof(ib));
b07c60c0 806 r = amdgpu_ib_get(adev, NULL, 256, &ib);
aaa36a97
AD
807 if (r) {
808 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
42d13693 809 goto err1;
aaa36a97
AD
810 }
811 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
812 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
813 ib.ptr[2] = 0xDEADBEEF;
814 ib.length_dw = 3;
42d13693 815
c5637837 816 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
42d13693
CZ
817 if (r)
818 goto err2;
819
1763552e 820 r = fence_wait(f, false);
aaa36a97
AD
821 if (r) {
822 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
42d13693 823 goto err2;
aaa36a97 824 }
6d44565d
CK
825 tmp = RREG32(scratch);
826 if (tmp == 0xDEADBEEF) {
827 DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
aaa36a97
AD
828 } else {
829 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
830 scratch, tmp);
831 r = -EINVAL;
832 }
42d13693 833err2:
cc55c45d 834 amdgpu_ib_free(adev, &ib, NULL);
73cfa5f5 835 fence_put(f);
42d13693
CZ
836err1:
837 amdgpu_gfx_scratch_free(adev, scratch);
aaa36a97
AD
838 return r;
839}
840
13331ac3
ML
841
842static void gfx_v8_0_free_microcode(struct amdgpu_device *adev) {
843 release_firmware(adev->gfx.pfp_fw);
844 adev->gfx.pfp_fw = NULL;
845 release_firmware(adev->gfx.me_fw);
846 adev->gfx.me_fw = NULL;
847 release_firmware(adev->gfx.ce_fw);
848 adev->gfx.ce_fw = NULL;
849 release_firmware(adev->gfx.rlc_fw);
850 adev->gfx.rlc_fw = NULL;
851 release_firmware(adev->gfx.mec_fw);
852 adev->gfx.mec_fw = NULL;
853 if ((adev->asic_type != CHIP_STONEY) &&
854 (adev->asic_type != CHIP_TOPAZ))
855 release_firmware(adev->gfx.mec2_fw);
856 adev->gfx.mec2_fw = NULL;
857
858 kfree(adev->gfx.rlc.register_list_format);
859}
860
aaa36a97
AD
861static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
862{
863 const char *chip_name;
864 char fw_name[30];
865 int err;
866 struct amdgpu_firmware_info *info = NULL;
867 const struct common_firmware_header *header = NULL;
595fd013 868 const struct gfx_firmware_header_v1_0 *cp_hdr;
2b6cd977
EH
869 const struct rlc_firmware_header_v2_0 *rlc_hdr;
870 unsigned int *tmp = NULL, i;
aaa36a97
AD
871
872 DRM_DEBUG("\n");
873
874 switch (adev->asic_type) {
875 case CHIP_TOPAZ:
876 chip_name = "topaz";
877 break;
878 case CHIP_TONGA:
879 chip_name = "tonga";
880 break;
881 case CHIP_CARRIZO:
882 chip_name = "carrizo";
883 break;
af15a2d5
DZ
884 case CHIP_FIJI:
885 chip_name = "fiji";
886 break;
2cc0c0b5
FC
887 case CHIP_POLARIS11:
888 chip_name = "polaris11";
68182d90 889 break;
2cc0c0b5
FC
890 case CHIP_POLARIS10:
891 chip_name = "polaris10";
68182d90 892 break;
e3c7656c
SL
893 case CHIP_STONEY:
894 chip_name = "stoney";
895 break;
aaa36a97
AD
896 default:
897 BUG();
898 }
899
c65444fe 900 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
aaa36a97
AD
901 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
902 if (err)
903 goto out;
904 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
905 if (err)
906 goto out;
595fd013
JZ
907 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
908 adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
909 adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 910
c65444fe 911 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name);
aaa36a97
AD
912 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
913 if (err)
914 goto out;
915 err = amdgpu_ucode_validate(adev->gfx.me_fw);
916 if (err)
917 goto out;
595fd013
JZ
918 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
919 adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
920 adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 921
c65444fe 922 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name);
aaa36a97
AD
923 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
924 if (err)
925 goto out;
926 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
927 if (err)
928 goto out;
595fd013
JZ
929 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
930 adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
931 adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 932
c65444fe 933 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
aaa36a97
AD
934 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
935 if (err)
936 goto out;
937 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
2b6cd977
EH
938 rlc_hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
939 adev->gfx.rlc_fw_version = le32_to_cpu(rlc_hdr->header.ucode_version);
940 adev->gfx.rlc_feature_version = le32_to_cpu(rlc_hdr->ucode_feature_version);
941
942 adev->gfx.rlc.save_and_restore_offset =
943 le32_to_cpu(rlc_hdr->save_and_restore_offset);
944 adev->gfx.rlc.clear_state_descriptor_offset =
945 le32_to_cpu(rlc_hdr->clear_state_descriptor_offset);
946 adev->gfx.rlc.avail_scratch_ram_locations =
947 le32_to_cpu(rlc_hdr->avail_scratch_ram_locations);
948 adev->gfx.rlc.reg_restore_list_size =
949 le32_to_cpu(rlc_hdr->reg_restore_list_size);
950 adev->gfx.rlc.reg_list_format_start =
951 le32_to_cpu(rlc_hdr->reg_list_format_start);
952 adev->gfx.rlc.reg_list_format_separate_start =
953 le32_to_cpu(rlc_hdr->reg_list_format_separate_start);
954 adev->gfx.rlc.starting_offsets_start =
955 le32_to_cpu(rlc_hdr->starting_offsets_start);
956 adev->gfx.rlc.reg_list_format_size_bytes =
957 le32_to_cpu(rlc_hdr->reg_list_format_size_bytes);
958 adev->gfx.rlc.reg_list_size_bytes =
959 le32_to_cpu(rlc_hdr->reg_list_size_bytes);
960
961 adev->gfx.rlc.register_list_format =
962 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes +
963 adev->gfx.rlc.reg_list_size_bytes, GFP_KERNEL);
964
965 if (!adev->gfx.rlc.register_list_format) {
966 err = -ENOMEM;
967 goto out;
968 }
969
ae17c999 970 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
971 le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes));
972 for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++)
973 adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]);
974
975 adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i;
976
ae17c999 977 tmp = (unsigned int *)((uintptr_t)rlc_hdr +
2b6cd977
EH
978 le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes));
979 for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++)
980 adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]);
aaa36a97 981
c65444fe 982 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name);
aaa36a97
AD
983 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
984 if (err)
985 goto out;
986 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
987 if (err)
988 goto out;
595fd013
JZ
989 cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
990 adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
991 adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
aaa36a97 992
97dde76a
AD
993 if ((adev->asic_type != CHIP_STONEY) &&
994 (adev->asic_type != CHIP_TOPAZ)) {
e3c7656c
SL
995 snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name);
996 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
997 if (!err) {
998 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
999 if (err)
1000 goto out;
1001 cp_hdr = (const struct gfx_firmware_header_v1_0 *)
1002 adev->gfx.mec2_fw->data;
1003 adev->gfx.mec2_fw_version =
1004 le32_to_cpu(cp_hdr->header.ucode_version);
1005 adev->gfx.mec2_feature_version =
1006 le32_to_cpu(cp_hdr->ucode_feature_version);
1007 } else {
1008 err = 0;
1009 adev->gfx.mec2_fw = NULL;
1010 }
aaa36a97
AD
1011 }
1012
1013 if (adev->firmware.smu_load) {
1014 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
1015 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
1016 info->fw = adev->gfx.pfp_fw;
1017 header = (const struct common_firmware_header *)info->fw->data;
1018 adev->firmware.fw_size +=
1019 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1020
1021 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
1022 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
1023 info->fw = adev->gfx.me_fw;
1024 header = (const struct common_firmware_header *)info->fw->data;
1025 adev->firmware.fw_size +=
1026 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1027
1028 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
1029 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
1030 info->fw = adev->gfx.ce_fw;
1031 header = (const struct common_firmware_header *)info->fw->data;
1032 adev->firmware.fw_size +=
1033 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1034
1035 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
1036 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
1037 info->fw = adev->gfx.rlc_fw;
1038 header = (const struct common_firmware_header *)info->fw->data;
1039 adev->firmware.fw_size +=
1040 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1041
1042 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
1043 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
1044 info->fw = adev->gfx.mec_fw;
1045 header = (const struct common_firmware_header *)info->fw->data;
1046 adev->firmware.fw_size +=
1047 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1048
1049 if (adev->gfx.mec2_fw) {
1050 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
1051 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
1052 info->fw = adev->gfx.mec2_fw;
1053 header = (const struct common_firmware_header *)info->fw->data;
1054 adev->firmware.fw_size +=
1055 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
1056 }
1057
1058 }
1059
1060out:
1061 if (err) {
1062 dev_err(adev->dev,
1063 "gfx8: Failed to load firmware \"%s\"\n",
1064 fw_name);
1065 release_firmware(adev->gfx.pfp_fw);
1066 adev->gfx.pfp_fw = NULL;
1067 release_firmware(adev->gfx.me_fw);
1068 adev->gfx.me_fw = NULL;
1069 release_firmware(adev->gfx.ce_fw);
1070 adev->gfx.ce_fw = NULL;
1071 release_firmware(adev->gfx.rlc_fw);
1072 adev->gfx.rlc_fw = NULL;
1073 release_firmware(adev->gfx.mec_fw);
1074 adev->gfx.mec_fw = NULL;
1075 release_firmware(adev->gfx.mec2_fw);
1076 adev->gfx.mec2_fw = NULL;
1077 }
1078 return err;
1079}
1080
2b6cd977
EH
1081static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev,
1082 volatile u32 *buffer)
1083{
1084 u32 count = 0, i;
1085 const struct cs_section_def *sect = NULL;
1086 const struct cs_extent_def *ext = NULL;
1087
1088 if (adev->gfx.rlc.cs_data == NULL)
1089 return;
1090 if (buffer == NULL)
1091 return;
1092
1093 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1094 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
1095
1096 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
1097 buffer[count++] = cpu_to_le32(0x80000000);
1098 buffer[count++] = cpu_to_le32(0x80000000);
1099
1100 for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) {
1101 for (ext = sect->section; ext->extent != NULL; ++ext) {
1102 if (sect->id == SECT_CONTEXT) {
1103 buffer[count++] =
1104 cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
1105 buffer[count++] = cpu_to_le32(ext->reg_index -
1106 PACKET3_SET_CONTEXT_REG_START);
1107 for (i = 0; i < ext->reg_count; i++)
1108 buffer[count++] = cpu_to_le32(ext->extent[i]);
1109 } else {
1110 return;
1111 }
1112 }
1113 }
1114
1115 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
1116 buffer[count++] = cpu_to_le32(mmPA_SC_RASTER_CONFIG -
1117 PACKET3_SET_CONTEXT_REG_START);
1118 switch (adev->asic_type) {
1119 case CHIP_TONGA:
2cc0c0b5 1120 case CHIP_POLARIS10:
2b6cd977
EH
1121 buffer[count++] = cpu_to_le32(0x16000012);
1122 buffer[count++] = cpu_to_le32(0x0000002A);
1123 break;
2cc0c0b5 1124 case CHIP_POLARIS11:
f4bfffdd
EH
1125 buffer[count++] = cpu_to_le32(0x16000012);
1126 buffer[count++] = cpu_to_le32(0x00000000);
1127 break;
2b6cd977
EH
1128 case CHIP_FIJI:
1129 buffer[count++] = cpu_to_le32(0x3a00161a);
1130 buffer[count++] = cpu_to_le32(0x0000002e);
1131 break;
1132 case CHIP_TOPAZ:
1133 case CHIP_CARRIZO:
1134 buffer[count++] = cpu_to_le32(0x00000002);
1135 buffer[count++] = cpu_to_le32(0x00000000);
1136 break;
1137 case CHIP_STONEY:
1138 buffer[count++] = cpu_to_le32(0x00000000);
1139 buffer[count++] = cpu_to_le32(0x00000000);
1140 break;
1141 default:
1142 buffer[count++] = cpu_to_le32(0x00000000);
1143 buffer[count++] = cpu_to_le32(0x00000000);
1144 break;
1145 }
1146
1147 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
1148 buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
1149
1150 buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
1151 buffer[count++] = cpu_to_le32(0);
1152}
1153
fb16007b
AD
1154static void cz_init_cp_jump_table(struct amdgpu_device *adev)
1155{
1156 const __le32 *fw_data;
1157 volatile u32 *dst_ptr;
1158 int me, i, max_me = 4;
1159 u32 bo_offset = 0;
1160 u32 table_offset, table_size;
1161
1162 if (adev->asic_type == CHIP_CARRIZO)
1163 max_me = 5;
1164
1165 /* write the cp table buffer */
1166 dst_ptr = adev->gfx.rlc.cp_table_ptr;
1167 for (me = 0; me < max_me; me++) {
1168 if (me == 0) {
1169 const struct gfx_firmware_header_v1_0 *hdr =
1170 (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data;
1171 fw_data = (const __le32 *)
1172 (adev->gfx.ce_fw->data +
1173 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1174 table_offset = le32_to_cpu(hdr->jt_offset);
1175 table_size = le32_to_cpu(hdr->jt_size);
1176 } else if (me == 1) {
1177 const struct gfx_firmware_header_v1_0 *hdr =
1178 (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data;
1179 fw_data = (const __le32 *)
1180 (adev->gfx.pfp_fw->data +
1181 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1182 table_offset = le32_to_cpu(hdr->jt_offset);
1183 table_size = le32_to_cpu(hdr->jt_size);
1184 } else if (me == 2) {
1185 const struct gfx_firmware_header_v1_0 *hdr =
1186 (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data;
1187 fw_data = (const __le32 *)
1188 (adev->gfx.me_fw->data +
1189 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1190 table_offset = le32_to_cpu(hdr->jt_offset);
1191 table_size = le32_to_cpu(hdr->jt_size);
1192 } else if (me == 3) {
1193 const struct gfx_firmware_header_v1_0 *hdr =
1194 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
1195 fw_data = (const __le32 *)
1196 (adev->gfx.mec_fw->data +
1197 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1198 table_offset = le32_to_cpu(hdr->jt_offset);
1199 table_size = le32_to_cpu(hdr->jt_size);
1200 } else if (me == 4) {
1201 const struct gfx_firmware_header_v1_0 *hdr =
1202 (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
1203 fw_data = (const __le32 *)
1204 (adev->gfx.mec2_fw->data +
1205 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1206 table_offset = le32_to_cpu(hdr->jt_offset);
1207 table_size = le32_to_cpu(hdr->jt_size);
1208 }
1209
1210 for (i = 0; i < table_size; i ++) {
1211 dst_ptr[bo_offset + i] =
1212 cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
1213 }
1214
1215 bo_offset += table_size;
1216 }
1217}
1218
2b6cd977
EH
1219static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev)
1220{
1221 int r;
1222
1223 /* clear state block */
1224 if (adev->gfx.rlc.clear_state_obj) {
1225 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1226 if (unlikely(r != 0))
1227 dev_warn(adev->dev, "(%d) reserve RLC c bo failed\n", r);
1228 amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj);
1229 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1230
1231 amdgpu_bo_unref(&adev->gfx.rlc.clear_state_obj);
1232 adev->gfx.rlc.clear_state_obj = NULL;
1233 }
fb16007b
AD
1234
1235 /* jump table block */
1236 if (adev->gfx.rlc.cp_table_obj) {
1237 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1238 if (unlikely(r != 0))
1239 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1240 amdgpu_bo_unpin(adev->gfx.rlc.cp_table_obj);
1241 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1242
1243 amdgpu_bo_unref(&adev->gfx.rlc.cp_table_obj);
1244 adev->gfx.rlc.cp_table_obj = NULL;
1245 }
2b6cd977
EH
1246}
1247
1248static int gfx_v8_0_rlc_init(struct amdgpu_device *adev)
1249{
1250 volatile u32 *dst_ptr;
1251 u32 dws;
1252 const struct cs_section_def *cs_data;
1253 int r;
1254
1255 adev->gfx.rlc.cs_data = vi_cs_data;
1256
1257 cs_data = adev->gfx.rlc.cs_data;
1258
1259 if (cs_data) {
1260 /* clear state block */
1261 adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev);
1262
1263 if (adev->gfx.rlc.clear_state_obj == NULL) {
1264 r = amdgpu_bo_create(adev, dws * 4, PAGE_SIZE, true,
1265 AMDGPU_GEM_DOMAIN_VRAM,
1266 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1267 NULL, NULL,
1268 &adev->gfx.rlc.clear_state_obj);
1269 if (r) {
1270 dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r);
1271 gfx_v8_0_rlc_fini(adev);
1272 return r;
1273 }
1274 }
1275 r = amdgpu_bo_reserve(adev->gfx.rlc.clear_state_obj, false);
1276 if (unlikely(r != 0)) {
1277 gfx_v8_0_rlc_fini(adev);
1278 return r;
1279 }
1280 r = amdgpu_bo_pin(adev->gfx.rlc.clear_state_obj, AMDGPU_GEM_DOMAIN_VRAM,
1281 &adev->gfx.rlc.clear_state_gpu_addr);
1282 if (r) {
1283 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1284 dev_warn(adev->dev, "(%d) pin RLC c bo failed\n", r);
1285 gfx_v8_0_rlc_fini(adev);
1286 return r;
1287 }
1288
1289 r = amdgpu_bo_kmap(adev->gfx.rlc.clear_state_obj, (void **)&adev->gfx.rlc.cs_ptr);
1290 if (r) {
1291 dev_warn(adev->dev, "(%d) map RLC c bo failed\n", r);
1292 gfx_v8_0_rlc_fini(adev);
1293 return r;
1294 }
1295 /* set up the cs buffer */
1296 dst_ptr = adev->gfx.rlc.cs_ptr;
1297 gfx_v8_0_get_csb_buffer(adev, dst_ptr);
1298 amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj);
1299 amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj);
1300 }
1301
fb16007b
AD
1302 if ((adev->asic_type == CHIP_CARRIZO) ||
1303 (adev->asic_type == CHIP_STONEY)) {
07cf1a0b 1304 adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */
fb16007b
AD
1305 if (adev->gfx.rlc.cp_table_obj == NULL) {
1306 r = amdgpu_bo_create(adev, adev->gfx.rlc.cp_table_size, PAGE_SIZE, true,
1307 AMDGPU_GEM_DOMAIN_VRAM,
1308 AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
1309 NULL, NULL,
1310 &adev->gfx.rlc.cp_table_obj);
1311 if (r) {
1312 dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r);
1313 return r;
1314 }
1315 }
1316
1317 r = amdgpu_bo_reserve(adev->gfx.rlc.cp_table_obj, false);
1318 if (unlikely(r != 0)) {
1319 dev_warn(adev->dev, "(%d) reserve RLC cp table bo failed\n", r);
1320 return r;
1321 }
1322 r = amdgpu_bo_pin(adev->gfx.rlc.cp_table_obj, AMDGPU_GEM_DOMAIN_VRAM,
1323 &adev->gfx.rlc.cp_table_gpu_addr);
1324 if (r) {
1325 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1326 dev_warn(adev->dev, "(%d) pin RLC cp_table bo failed\n", r);
1327 return r;
1328 }
1329 r = amdgpu_bo_kmap(adev->gfx.rlc.cp_table_obj, (void **)&adev->gfx.rlc.cp_table_ptr);
1330 if (r) {
1331 dev_warn(adev->dev, "(%d) map RLC cp table bo failed\n", r);
1332 return r;
1333 }
1334
1335 cz_init_cp_jump_table(adev);
1336
1337 amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj);
1338 amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj);
1339
1340 }
1341
2b6cd977
EH
1342 return 0;
1343}
1344
aaa36a97
AD
1345static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1346{
1347 int r;
1348
1349 if (adev->gfx.mec.hpd_eop_obj) {
1350 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1351 if (unlikely(r != 0))
1352 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
1353 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
1354 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1355
1356 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
1357 adev->gfx.mec.hpd_eop_obj = NULL;
1358 }
1359}
1360
1361#define MEC_HPD_SIZE 2048
1362
1363static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1364{
1365 int r;
1366 u32 *hpd;
1367
1368 /*
1369 * we assign only 1 pipe because all other pipes will
1370 * be handled by KFD
1371 */
1372 adev->gfx.mec.num_mec = 1;
1373 adev->gfx.mec.num_pipe = 1;
1374 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
1375
1376 if (adev->gfx.mec.hpd_eop_obj == NULL) {
1377 r = amdgpu_bo_create(adev,
1378 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
1379 PAGE_SIZE, true,
72d7668b 1380 AMDGPU_GEM_DOMAIN_GTT, 0, NULL, NULL,
aaa36a97
AD
1381 &adev->gfx.mec.hpd_eop_obj);
1382 if (r) {
1383 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
1384 return r;
1385 }
1386 }
1387
1388 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
1389 if (unlikely(r != 0)) {
1390 gfx_v8_0_mec_fini(adev);
1391 return r;
1392 }
1393 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
1394 &adev->gfx.mec.hpd_eop_gpu_addr);
1395 if (r) {
1396 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
1397 gfx_v8_0_mec_fini(adev);
1398 return r;
1399 }
1400 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
1401 if (r) {
1402 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
1403 gfx_v8_0_mec_fini(adev);
1404 return r;
1405 }
1406
1407 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
1408
1409 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
1410 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
1411
1412 return 0;
1413}
1414
ccba7691
AD
1415static const u32 vgpr_init_compute_shader[] =
1416{
1417 0x7e000209, 0x7e020208,
1418 0x7e040207, 0x7e060206,
1419 0x7e080205, 0x7e0a0204,
1420 0x7e0c0203, 0x7e0e0202,
1421 0x7e100201, 0x7e120200,
1422 0x7e140209, 0x7e160208,
1423 0x7e180207, 0x7e1a0206,
1424 0x7e1c0205, 0x7e1e0204,
1425 0x7e200203, 0x7e220202,
1426 0x7e240201, 0x7e260200,
1427 0x7e280209, 0x7e2a0208,
1428 0x7e2c0207, 0x7e2e0206,
1429 0x7e300205, 0x7e320204,
1430 0x7e340203, 0x7e360202,
1431 0x7e380201, 0x7e3a0200,
1432 0x7e3c0209, 0x7e3e0208,
1433 0x7e400207, 0x7e420206,
1434 0x7e440205, 0x7e460204,
1435 0x7e480203, 0x7e4a0202,
1436 0x7e4c0201, 0x7e4e0200,
1437 0x7e500209, 0x7e520208,
1438 0x7e540207, 0x7e560206,
1439 0x7e580205, 0x7e5a0204,
1440 0x7e5c0203, 0x7e5e0202,
1441 0x7e600201, 0x7e620200,
1442 0x7e640209, 0x7e660208,
1443 0x7e680207, 0x7e6a0206,
1444 0x7e6c0205, 0x7e6e0204,
1445 0x7e700203, 0x7e720202,
1446 0x7e740201, 0x7e760200,
1447 0x7e780209, 0x7e7a0208,
1448 0x7e7c0207, 0x7e7e0206,
1449 0xbf8a0000, 0xbf810000,
1450};
1451
1452static const u32 sgpr_init_compute_shader[] =
1453{
1454 0xbe8a0100, 0xbe8c0102,
1455 0xbe8e0104, 0xbe900106,
1456 0xbe920108, 0xbe940100,
1457 0xbe960102, 0xbe980104,
1458 0xbe9a0106, 0xbe9c0108,
1459 0xbe9e0100, 0xbea00102,
1460 0xbea20104, 0xbea40106,
1461 0xbea60108, 0xbea80100,
1462 0xbeaa0102, 0xbeac0104,
1463 0xbeae0106, 0xbeb00108,
1464 0xbeb20100, 0xbeb40102,
1465 0xbeb60104, 0xbeb80106,
1466 0xbeba0108, 0xbebc0100,
1467 0xbebe0102, 0xbec00104,
1468 0xbec20106, 0xbec40108,
1469 0xbec60100, 0xbec80102,
1470 0xbee60004, 0xbee70005,
1471 0xbeea0006, 0xbeeb0007,
1472 0xbee80008, 0xbee90009,
1473 0xbefc0000, 0xbf8a0000,
1474 0xbf810000, 0x00000000,
1475};
1476
1477static const u32 vgpr_init_regs[] =
1478{
1479 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xffffffff,
1480 mmCOMPUTE_RESOURCE_LIMITS, 0,
1481 mmCOMPUTE_NUM_THREAD_X, 256*4,
1482 mmCOMPUTE_NUM_THREAD_Y, 1,
1483 mmCOMPUTE_NUM_THREAD_Z, 1,
1484 mmCOMPUTE_PGM_RSRC2, 20,
1485 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1486 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1487 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1488 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1489 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1490 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1491 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1492 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1493 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1494 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1495};
1496
1497static const u32 sgpr1_init_regs[] =
1498{
1499 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0x0f,
1500 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1501 mmCOMPUTE_NUM_THREAD_X, 256*5,
1502 mmCOMPUTE_NUM_THREAD_Y, 1,
1503 mmCOMPUTE_NUM_THREAD_Z, 1,
1504 mmCOMPUTE_PGM_RSRC2, 20,
1505 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1506 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1507 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1508 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1509 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1510 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1511 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1512 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1513 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1514 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1515};
1516
1517static const u32 sgpr2_init_regs[] =
1518{
1519 mmCOMPUTE_STATIC_THREAD_MGMT_SE0, 0xf0,
1520 mmCOMPUTE_RESOURCE_LIMITS, 0x1000000,
1521 mmCOMPUTE_NUM_THREAD_X, 256*5,
1522 mmCOMPUTE_NUM_THREAD_Y, 1,
1523 mmCOMPUTE_NUM_THREAD_Z, 1,
1524 mmCOMPUTE_PGM_RSRC2, 20,
1525 mmCOMPUTE_USER_DATA_0, 0xedcedc00,
1526 mmCOMPUTE_USER_DATA_1, 0xedcedc01,
1527 mmCOMPUTE_USER_DATA_2, 0xedcedc02,
1528 mmCOMPUTE_USER_DATA_3, 0xedcedc03,
1529 mmCOMPUTE_USER_DATA_4, 0xedcedc04,
1530 mmCOMPUTE_USER_DATA_5, 0xedcedc05,
1531 mmCOMPUTE_USER_DATA_6, 0xedcedc06,
1532 mmCOMPUTE_USER_DATA_7, 0xedcedc07,
1533 mmCOMPUTE_USER_DATA_8, 0xedcedc08,
1534 mmCOMPUTE_USER_DATA_9, 0xedcedc09,
1535};
1536
1537static const u32 sec_ded_counter_registers[] =
1538{
1539 mmCPC_EDC_ATC_CNT,
1540 mmCPC_EDC_SCRATCH_CNT,
1541 mmCPC_EDC_UCODE_CNT,
1542 mmCPF_EDC_ATC_CNT,
1543 mmCPF_EDC_ROQ_CNT,
1544 mmCPF_EDC_TAG_CNT,
1545 mmCPG_EDC_ATC_CNT,
1546 mmCPG_EDC_DMA_CNT,
1547 mmCPG_EDC_TAG_CNT,
1548 mmDC_EDC_CSINVOC_CNT,
1549 mmDC_EDC_RESTORE_CNT,
1550 mmDC_EDC_STATE_CNT,
1551 mmGDS_EDC_CNT,
1552 mmGDS_EDC_GRBM_CNT,
1553 mmGDS_EDC_OA_DED,
1554 mmSPI_EDC_CNT,
1555 mmSQC_ATC_EDC_GATCL1_CNT,
1556 mmSQC_EDC_CNT,
1557 mmSQ_EDC_DED_CNT,
1558 mmSQ_EDC_INFO,
1559 mmSQ_EDC_SEC_CNT,
1560 mmTCC_EDC_CNT,
1561 mmTCP_ATC_EDC_GATCL1_CNT,
1562 mmTCP_EDC_CNT,
1563 mmTD_EDC_CNT
1564};
1565
1566static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
1567{
1568 struct amdgpu_ring *ring = &adev->gfx.compute_ring[0];
1569 struct amdgpu_ib ib;
1570 struct fence *f = NULL;
1571 int r, i;
1572 u32 tmp;
1573 unsigned total_size, vgpr_offset, sgpr_offset;
1574 u64 gpu_addr;
1575
1576 /* only supported on CZ */
1577 if (adev->asic_type != CHIP_CARRIZO)
1578 return 0;
1579
1580 /* bail if the compute ring is not ready */
1581 if (!ring->ready)
1582 return 0;
1583
1584 tmp = RREG32(mmGB_EDC_MODE);
1585 WREG32(mmGB_EDC_MODE, 0);
1586
1587 total_size =
1588 (((ARRAY_SIZE(vgpr_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1589 total_size +=
1590 (((ARRAY_SIZE(sgpr1_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1591 total_size +=
1592 (((ARRAY_SIZE(sgpr2_init_regs) / 2) * 3) + 4 + 5 + 2) * 4;
1593 total_size = ALIGN(total_size, 256);
1594 vgpr_offset = total_size;
1595 total_size += ALIGN(sizeof(vgpr_init_compute_shader), 256);
1596 sgpr_offset = total_size;
1597 total_size += sizeof(sgpr_init_compute_shader);
1598
1599 /* allocate an indirect buffer to put the commands in */
1600 memset(&ib, 0, sizeof(ib));
b07c60c0 1601 r = amdgpu_ib_get(adev, NULL, total_size, &ib);
ccba7691
AD
1602 if (r) {
1603 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
1604 return r;
1605 }
1606
1607 /* load the compute shaders */
1608 for (i = 0; i < ARRAY_SIZE(vgpr_init_compute_shader); i++)
1609 ib.ptr[i + (vgpr_offset / 4)] = vgpr_init_compute_shader[i];
1610
1611 for (i = 0; i < ARRAY_SIZE(sgpr_init_compute_shader); i++)
1612 ib.ptr[i + (sgpr_offset / 4)] = sgpr_init_compute_shader[i];
1613
1614 /* init the ib length to 0 */
1615 ib.length_dw = 0;
1616
1617 /* VGPR */
1618 /* write the register state for the compute dispatch */
1619 for (i = 0; i < ARRAY_SIZE(vgpr_init_regs); i += 2) {
1620 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1621 ib.ptr[ib.length_dw++] = vgpr_init_regs[i] - PACKET3_SET_SH_REG_START;
1622 ib.ptr[ib.length_dw++] = vgpr_init_regs[i + 1];
1623 }
1624 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1625 gpu_addr = (ib.gpu_addr + (u64)vgpr_offset) >> 8;
1626 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1627 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1628 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1629 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1630
1631 /* write dispatch packet */
1632 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1633 ib.ptr[ib.length_dw++] = 8; /* x */
1634 ib.ptr[ib.length_dw++] = 1; /* y */
1635 ib.ptr[ib.length_dw++] = 1; /* z */
1636 ib.ptr[ib.length_dw++] =
1637 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1638
1639 /* write CS partial flush packet */
1640 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1641 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1642
1643 /* SGPR1 */
1644 /* write the register state for the compute dispatch */
1645 for (i = 0; i < ARRAY_SIZE(sgpr1_init_regs); i += 2) {
1646 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1647 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i] - PACKET3_SET_SH_REG_START;
1648 ib.ptr[ib.length_dw++] = sgpr1_init_regs[i + 1];
1649 }
1650 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1651 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1652 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1653 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1654 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1655 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1656
1657 /* write dispatch packet */
1658 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1659 ib.ptr[ib.length_dw++] = 8; /* x */
1660 ib.ptr[ib.length_dw++] = 1; /* y */
1661 ib.ptr[ib.length_dw++] = 1; /* z */
1662 ib.ptr[ib.length_dw++] =
1663 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1664
1665 /* write CS partial flush packet */
1666 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1667 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1668
1669 /* SGPR2 */
1670 /* write the register state for the compute dispatch */
1671 for (i = 0; i < ARRAY_SIZE(sgpr2_init_regs); i += 2) {
1672 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
1673 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i] - PACKET3_SET_SH_REG_START;
1674 ib.ptr[ib.length_dw++] = sgpr2_init_regs[i + 1];
1675 }
1676 /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
1677 gpu_addr = (ib.gpu_addr + (u64)sgpr_offset) >> 8;
1678 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
1679 ib.ptr[ib.length_dw++] = mmCOMPUTE_PGM_LO - PACKET3_SET_SH_REG_START;
1680 ib.ptr[ib.length_dw++] = lower_32_bits(gpu_addr);
1681 ib.ptr[ib.length_dw++] = upper_32_bits(gpu_addr);
1682
1683 /* write dispatch packet */
1684 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
1685 ib.ptr[ib.length_dw++] = 8; /* x */
1686 ib.ptr[ib.length_dw++] = 1; /* y */
1687 ib.ptr[ib.length_dw++] = 1; /* z */
1688 ib.ptr[ib.length_dw++] =
1689 REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
1690
1691 /* write CS partial flush packet */
1692 ib.ptr[ib.length_dw++] = PACKET3(PACKET3_EVENT_WRITE, 0);
1693 ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
1694
1695 /* shedule the ib on the ring */
c5637837 1696 r = amdgpu_ib_schedule(ring, 1, &ib, NULL, NULL, &f);
ccba7691
AD
1697 if (r) {
1698 DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
1699 goto fail;
1700 }
1701
1702 /* wait for the GPU to finish processing the IB */
1703 r = fence_wait(f, false);
1704 if (r) {
1705 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
1706 goto fail;
1707 }
1708
1709 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, DED_MODE, 2);
1710 tmp = REG_SET_FIELD(tmp, GB_EDC_MODE, PROP_FED, 1);
1711 WREG32(mmGB_EDC_MODE, tmp);
1712
1713 tmp = RREG32(mmCC_GC_EDC_CONFIG);
1714 tmp = REG_SET_FIELD(tmp, CC_GC_EDC_CONFIG, DIS_EDC, 0) | 1;
1715 WREG32(mmCC_GC_EDC_CONFIG, tmp);
1716
1717
1718 /* read back registers to clear the counters */
1719 for (i = 0; i < ARRAY_SIZE(sec_ded_counter_registers); i++)
1720 RREG32(sec_ded_counter_registers[i]);
1721
1722fail:
cc55c45d 1723 amdgpu_ib_free(adev, &ib, NULL);
73cfa5f5 1724 fence_put(f);
ccba7691
AD
1725
1726 return r;
1727}
1728
68182d90 1729static int gfx_v8_0_gpu_early_init(struct amdgpu_device *adev)
0bde3a95
AD
1730{
1731 u32 gb_addr_config;
1732 u32 mc_shared_chmap, mc_arb_ramcfg;
1733 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1734 u32 tmp;
68182d90 1735 int ret;
0bde3a95
AD
1736
1737 switch (adev->asic_type) {
1738 case CHIP_TOPAZ:
1739 adev->gfx.config.max_shader_engines = 1;
1740 adev->gfx.config.max_tile_pipes = 2;
1741 adev->gfx.config.max_cu_per_sh = 6;
1742 adev->gfx.config.max_sh_per_se = 1;
1743 adev->gfx.config.max_backends_per_se = 2;
1744 adev->gfx.config.max_texture_channel_caches = 2;
1745 adev->gfx.config.max_gprs = 256;
1746 adev->gfx.config.max_gs_threads = 32;
1747 adev->gfx.config.max_hw_contexts = 8;
1748
1749 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1750 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1751 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1752 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1753 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1754 break;
1755 case CHIP_FIJI:
1756 adev->gfx.config.max_shader_engines = 4;
1757 adev->gfx.config.max_tile_pipes = 16;
1758 adev->gfx.config.max_cu_per_sh = 16;
1759 adev->gfx.config.max_sh_per_se = 1;
1760 adev->gfx.config.max_backends_per_se = 4;
5f2e816b 1761 adev->gfx.config.max_texture_channel_caches = 16;
0bde3a95
AD
1762 adev->gfx.config.max_gprs = 256;
1763 adev->gfx.config.max_gs_threads = 32;
1764 adev->gfx.config.max_hw_contexts = 8;
1765
68182d90
FC
1766 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1767 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1768 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1769 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1770 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1771 break;
2cc0c0b5 1772 case CHIP_POLARIS11:
68182d90
FC
1773 ret = amdgpu_atombios_get_gfx_info(adev);
1774 if (ret)
1775 return ret;
1776 adev->gfx.config.max_gprs = 256;
1777 adev->gfx.config.max_gs_threads = 32;
1778 adev->gfx.config.max_hw_contexts = 8;
1779
1780 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1781 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1782 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1783 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
2cc0c0b5 1784 gb_addr_config = POLARIS11_GB_ADDR_CONFIG_GOLDEN;
68182d90 1785 break;
2cc0c0b5 1786 case CHIP_POLARIS10:
68182d90
FC
1787 ret = amdgpu_atombios_get_gfx_info(adev);
1788 if (ret)
1789 return ret;
1790 adev->gfx.config.max_gprs = 256;
1791 adev->gfx.config.max_gs_threads = 32;
1792 adev->gfx.config.max_hw_contexts = 8;
1793
0bde3a95
AD
1794 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1795 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1796 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1797 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1798 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1799 break;
1800 case CHIP_TONGA:
1801 adev->gfx.config.max_shader_engines = 4;
1802 adev->gfx.config.max_tile_pipes = 8;
1803 adev->gfx.config.max_cu_per_sh = 8;
1804 adev->gfx.config.max_sh_per_se = 1;
1805 adev->gfx.config.max_backends_per_se = 2;
1806 adev->gfx.config.max_texture_channel_caches = 8;
1807 adev->gfx.config.max_gprs = 256;
1808 adev->gfx.config.max_gs_threads = 32;
1809 adev->gfx.config.max_hw_contexts = 8;
1810
1811 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1812 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1813 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1814 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1815 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1816 break;
1817 case CHIP_CARRIZO:
1818 adev->gfx.config.max_shader_engines = 1;
1819 adev->gfx.config.max_tile_pipes = 2;
1820 adev->gfx.config.max_sh_per_se = 1;
1821 adev->gfx.config.max_backends_per_se = 2;
1822
1823 switch (adev->pdev->revision) {
1824 case 0xc4:
1825 case 0x84:
1826 case 0xc8:
1827 case 0xcc:
b8b339ea
AD
1828 case 0xe1:
1829 case 0xe3:
0bde3a95
AD
1830 /* B10 */
1831 adev->gfx.config.max_cu_per_sh = 8;
1832 break;
1833 case 0xc5:
1834 case 0x81:
1835 case 0x85:
1836 case 0xc9:
1837 case 0xcd:
b8b339ea
AD
1838 case 0xe2:
1839 case 0xe4:
0bde3a95
AD
1840 /* B8 */
1841 adev->gfx.config.max_cu_per_sh = 6;
1842 break;
1843 case 0xc6:
1844 case 0xca:
1845 case 0xce:
b8b339ea 1846 case 0x88:
0bde3a95
AD
1847 /* B6 */
1848 adev->gfx.config.max_cu_per_sh = 6;
1849 break;
1850 case 0xc7:
1851 case 0x87:
1852 case 0xcb:
b8b339ea
AD
1853 case 0xe5:
1854 case 0x89:
0bde3a95
AD
1855 default:
1856 /* B4 */
1857 adev->gfx.config.max_cu_per_sh = 4;
1858 break;
1859 }
1860
1861 adev->gfx.config.max_texture_channel_caches = 2;
1862 adev->gfx.config.max_gprs = 256;
1863 adev->gfx.config.max_gs_threads = 32;
1864 adev->gfx.config.max_hw_contexts = 8;
1865
e3c7656c
SL
1866 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1867 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1868 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1869 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1870 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1871 break;
1872 case CHIP_STONEY:
1873 adev->gfx.config.max_shader_engines = 1;
1874 adev->gfx.config.max_tile_pipes = 2;
1875 adev->gfx.config.max_sh_per_se = 1;
1876 adev->gfx.config.max_backends_per_se = 1;
1877
1878 switch (adev->pdev->revision) {
1879 case 0xc0:
1880 case 0xc1:
1881 case 0xc2:
1882 case 0xc4:
1883 case 0xc8:
1884 case 0xc9:
1885 adev->gfx.config.max_cu_per_sh = 3;
1886 break;
1887 case 0xd0:
1888 case 0xd1:
1889 case 0xd2:
1890 default:
1891 adev->gfx.config.max_cu_per_sh = 2;
1892 break;
1893 }
1894
1895 adev->gfx.config.max_texture_channel_caches = 2;
1896 adev->gfx.config.max_gprs = 256;
1897 adev->gfx.config.max_gs_threads = 16;
1898 adev->gfx.config.max_hw_contexts = 8;
1899
0bde3a95
AD
1900 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1901 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1902 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1903 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1904 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1905 break;
1906 default:
1907 adev->gfx.config.max_shader_engines = 2;
1908 adev->gfx.config.max_tile_pipes = 4;
1909 adev->gfx.config.max_cu_per_sh = 2;
1910 adev->gfx.config.max_sh_per_se = 1;
1911 adev->gfx.config.max_backends_per_se = 2;
1912 adev->gfx.config.max_texture_channel_caches = 4;
1913 adev->gfx.config.max_gprs = 256;
1914 adev->gfx.config.max_gs_threads = 32;
1915 adev->gfx.config.max_hw_contexts = 8;
1916
1917 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1918 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1919 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1920 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1921 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1922 break;
1923 }
1924
1925 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1926 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1927 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1928
1929 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1930 adev->gfx.config.mem_max_burst_length_bytes = 256;
1931 if (adev->flags & AMD_IS_APU) {
1932 /* Get memory bank mapping mode. */
1933 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1934 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1935 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1936
1937 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1938 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1939 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1940
1941 /* Validate settings in case only one DIMM installed. */
1942 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1943 dimm00_addr_map = 0;
1944 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1945 dimm01_addr_map = 0;
1946 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1947 dimm10_addr_map = 0;
1948 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1949 dimm11_addr_map = 0;
1950
1951 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1952 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1953 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1954 adev->gfx.config.mem_row_size_in_kb = 2;
1955 else
1956 adev->gfx.config.mem_row_size_in_kb = 1;
1957 } else {
1958 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
1959 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1960 if (adev->gfx.config.mem_row_size_in_kb > 4)
1961 adev->gfx.config.mem_row_size_in_kb = 4;
1962 }
1963
1964 adev->gfx.config.shader_engine_tile_size = 32;
1965 adev->gfx.config.num_gpus = 1;
1966 adev->gfx.config.multi_gpu_tile_size = 64;
1967
1968 /* fix up row size */
1969 switch (adev->gfx.config.mem_row_size_in_kb) {
1970 case 1:
1971 default:
1972 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
1973 break;
1974 case 2:
1975 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
1976 break;
1977 case 4:
1978 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
1979 break;
1980 }
1981 adev->gfx.config.gb_addr_config = gb_addr_config;
68182d90
FC
1982
1983 return 0;
0bde3a95
AD
1984}
1985
5fc3aeeb 1986static int gfx_v8_0_sw_init(void *handle)
aaa36a97
AD
1987{
1988 int i, r;
1989 struct amdgpu_ring *ring;
5fc3aeeb 1990 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
1991
1992 /* EOP Event */
1993 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
1994 if (r)
1995 return r;
1996
1997 /* Privileged reg */
1998 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
1999 if (r)
2000 return r;
2001
2002 /* Privileged inst */
2003 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
2004 if (r)
2005 return r;
2006
2007 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
2008
2009 gfx_v8_0_scratch_init(adev);
2010
2011 r = gfx_v8_0_init_microcode(adev);
2012 if (r) {
2013 DRM_ERROR("Failed to load gfx firmware!\n");
2014 return r;
2015 }
2016
2b6cd977
EH
2017 r = gfx_v8_0_rlc_init(adev);
2018 if (r) {
2019 DRM_ERROR("Failed to init rlc BOs!\n");
2020 return r;
2021 }
2022
aaa36a97
AD
2023 r = gfx_v8_0_mec_init(adev);
2024 if (r) {
2025 DRM_ERROR("Failed to init MEC BOs!\n");
2026 return r;
2027 }
2028
aaa36a97
AD
2029 /* set up the gfx ring */
2030 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2031 ring = &adev->gfx.gfx_ring[i];
2032 ring->ring_obj = NULL;
2033 sprintf(ring->name, "gfx");
2034 /* no gfx doorbells on iceland */
2035 if (adev->asic_type != CHIP_TOPAZ) {
2036 ring->use_doorbell = true;
2037 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
2038 }
2039
2800de2e 2040 r = amdgpu_ring_init(adev, ring, 1024,
aaa36a97
AD
2041 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2042 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
2043 AMDGPU_RING_TYPE_GFX);
2044 if (r)
2045 return r;
2046 }
2047
2048 /* set up the compute queues */
2049 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2050 unsigned irq_type;
2051
2052 /* max 32 queues per MEC */
2053 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
2054 DRM_ERROR("Too many (%d) compute rings!\n", i);
2055 break;
2056 }
2057 ring = &adev->gfx.compute_ring[i];
2058 ring->ring_obj = NULL;
2059 ring->use_doorbell = true;
2060 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
2061 ring->me = 1; /* first MEC */
2062 ring->pipe = i / 8;
2063 ring->queue = i % 8;
771c8ec1 2064 sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue);
aaa36a97
AD
2065 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
2066 /* type-2 packets are deprecated on MEC, use type-3 instead */
2800de2e 2067 r = amdgpu_ring_init(adev, ring, 1024,
aaa36a97
AD
2068 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
2069 &adev->gfx.eop_irq, irq_type,
2070 AMDGPU_RING_TYPE_COMPUTE);
2071 if (r)
2072 return r;
2073 }
2074
2075 /* reserve GDS, GWS and OA resource for gfx */
2076 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
2077 PAGE_SIZE, true,
72d7668b 2078 AMDGPU_GEM_DOMAIN_GDS, 0, NULL,
aaa36a97
AD
2079 NULL, &adev->gds.gds_gfx_bo);
2080 if (r)
2081 return r;
2082
2083 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
2084 PAGE_SIZE, true,
72d7668b 2085 AMDGPU_GEM_DOMAIN_GWS, 0, NULL,
aaa36a97
AD
2086 NULL, &adev->gds.gws_gfx_bo);
2087 if (r)
2088 return r;
2089
2090 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
2091 PAGE_SIZE, true,
72d7668b 2092 AMDGPU_GEM_DOMAIN_OA, 0, NULL,
aaa36a97
AD
2093 NULL, &adev->gds.oa_gfx_bo);
2094 if (r)
2095 return r;
2096
a101a899
KW
2097 adev->gfx.ce_ram_size = 0x8000;
2098
68182d90
FC
2099 r = gfx_v8_0_gpu_early_init(adev);
2100 if (r)
2101 return r;
0bde3a95 2102
aaa36a97
AD
2103 return 0;
2104}
2105
5fc3aeeb 2106static int gfx_v8_0_sw_fini(void *handle)
aaa36a97
AD
2107{
2108 int i;
5fc3aeeb 2109 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
2110
2111 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
2112 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
2113 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
2114
2115 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2116 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2117 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2118 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2119
aaa36a97
AD
2120 gfx_v8_0_mec_fini(adev);
2121
2b6cd977
EH
2122 gfx_v8_0_rlc_fini(adev);
2123
13331ac3 2124 gfx_v8_0_free_microcode(adev);
2b6cd977 2125
aaa36a97
AD
2126 return 0;
2127}
2128
2129static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
2130{
90bea0ab 2131 uint32_t *modearray, *mod2array;
eb64526f
TSD
2132 const u32 num_tile_mode_states = ARRAY_SIZE(adev->gfx.config.tile_mode_array);
2133 const u32 num_secondary_tile_mode_states = ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
90bea0ab 2134 u32 reg_offset;
aaa36a97 2135
90bea0ab
TSD
2136 modearray = adev->gfx.config.tile_mode_array;
2137 mod2array = adev->gfx.config.macrotile_mode_array;
2138
2139 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2140 modearray[reg_offset] = 0;
2141
2142 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2143 mod2array[reg_offset] = 0;
aaa36a97
AD
2144
2145 switch (adev->asic_type) {
2146 case CHIP_TOPAZ:
90bea0ab
TSD
2147 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2148 PIPE_CONFIG(ADDR_SURF_P2) |
2149 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2150 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2151 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2152 PIPE_CONFIG(ADDR_SURF_P2) |
2153 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2154 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2155 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2156 PIPE_CONFIG(ADDR_SURF_P2) |
2157 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2158 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2159 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 PIPE_CONFIG(ADDR_SURF_P2) |
2161 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2162 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2163 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2164 PIPE_CONFIG(ADDR_SURF_P2) |
2165 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2166 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2167 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2168 PIPE_CONFIG(ADDR_SURF_P2) |
2169 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2170 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2171 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2172 PIPE_CONFIG(ADDR_SURF_P2) |
2173 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2174 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2175 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2176 PIPE_CONFIG(ADDR_SURF_P2));
2177 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178 PIPE_CONFIG(ADDR_SURF_P2) |
2179 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2180 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2181 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 PIPE_CONFIG(ADDR_SURF_P2) |
2183 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2184 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2186 PIPE_CONFIG(ADDR_SURF_P2) |
2187 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2188 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2189 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2190 PIPE_CONFIG(ADDR_SURF_P2) |
2191 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2192 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2193 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2194 PIPE_CONFIG(ADDR_SURF_P2) |
2195 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2196 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2198 PIPE_CONFIG(ADDR_SURF_P2) |
2199 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2200 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2201 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2202 PIPE_CONFIG(ADDR_SURF_P2) |
2203 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2204 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2205 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2206 PIPE_CONFIG(ADDR_SURF_P2) |
2207 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2208 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2209 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2210 PIPE_CONFIG(ADDR_SURF_P2) |
2211 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2212 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2213 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2214 PIPE_CONFIG(ADDR_SURF_P2) |
2215 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2216 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2217 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2218 PIPE_CONFIG(ADDR_SURF_P2) |
2219 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2220 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2221 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2222 PIPE_CONFIG(ADDR_SURF_P2) |
2223 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2224 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2225 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2226 PIPE_CONFIG(ADDR_SURF_P2) |
2227 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2228 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2229 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2230 PIPE_CONFIG(ADDR_SURF_P2) |
2231 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2232 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2233 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2234 PIPE_CONFIG(ADDR_SURF_P2) |
2235 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2236 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2237 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2238 PIPE_CONFIG(ADDR_SURF_P2) |
2239 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2240 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2241 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2242 PIPE_CONFIG(ADDR_SURF_P2) |
2243 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2244 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2245 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2246 PIPE_CONFIG(ADDR_SURF_P2) |
2247 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2248 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2249
2250 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2251 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2252 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2253 NUM_BANKS(ADDR_SURF_8_BANK));
2254 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2255 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2256 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2257 NUM_BANKS(ADDR_SURF_8_BANK));
2258 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2259 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2260 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2261 NUM_BANKS(ADDR_SURF_8_BANK));
2262 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2263 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2264 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2265 NUM_BANKS(ADDR_SURF_8_BANK));
2266 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2267 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2268 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2269 NUM_BANKS(ADDR_SURF_8_BANK));
2270 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2271 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2272 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2273 NUM_BANKS(ADDR_SURF_8_BANK));
2274 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2275 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2276 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2277 NUM_BANKS(ADDR_SURF_8_BANK));
2278 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2279 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2280 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2281 NUM_BANKS(ADDR_SURF_16_BANK));
2282 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2283 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2284 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2285 NUM_BANKS(ADDR_SURF_16_BANK));
2286 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2287 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2288 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2289 NUM_BANKS(ADDR_SURF_16_BANK));
2290 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2291 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2292 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2293 NUM_BANKS(ADDR_SURF_16_BANK));
2294 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2295 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2296 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2297 NUM_BANKS(ADDR_SURF_16_BANK));
2298 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2299 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2300 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2301 NUM_BANKS(ADDR_SURF_16_BANK));
2302 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2303 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2304 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2305 NUM_BANKS(ADDR_SURF_8_BANK));
2306
2307 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2308 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
2309 reg_offset != 23)
2310 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2311
2312 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2313 if (reg_offset != 7)
2314 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2315
8cdacf44 2316 break;
af15a2d5 2317 case CHIP_FIJI:
90bea0ab
TSD
2318 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2320 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2321 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2322 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2323 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2324 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2325 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2326 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2327 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2328 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2329 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2330 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2332 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2333 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2334 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2335 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2336 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2337 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2338 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2339 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2340 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2341 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2342 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2343 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2344 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2345 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2346 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2347 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2348 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2349 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2350 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2351 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2352 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2353 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2354 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2356 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2358 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2359 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2361 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2362 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2363 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2364 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2365 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2366 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2368 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2369 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2370 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2371 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2372 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2373 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2374 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2375 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2376 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2377 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2378 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2380 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2381 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2382 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2383 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2384 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2385 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2386 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2388 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2389 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2390 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2392 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2393 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2394 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2395 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2396 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2397 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2398 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2399 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2400 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2401 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2402 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2403 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2404 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2405 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2407 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2408 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2409 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2410 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2411 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2412 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2413 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2414 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2415 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2416 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2417 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2419 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2420 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2421 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2422 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2423 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2424 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2425 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2426 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2427 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2428 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2430 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2431 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2432 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2433 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2434 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2436 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2438 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2439 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2440
2441 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2443 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444 NUM_BANKS(ADDR_SURF_8_BANK));
2445 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2448 NUM_BANKS(ADDR_SURF_8_BANK));
2449 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2450 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2451 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2452 NUM_BANKS(ADDR_SURF_8_BANK));
2453 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2455 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2456 NUM_BANKS(ADDR_SURF_8_BANK));
2457 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2460 NUM_BANKS(ADDR_SURF_8_BANK));
2461 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2462 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2463 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2464 NUM_BANKS(ADDR_SURF_8_BANK));
2465 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 NUM_BANKS(ADDR_SURF_8_BANK));
2469 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2472 NUM_BANKS(ADDR_SURF_8_BANK));
2473 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2475 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2476 NUM_BANKS(ADDR_SURF_8_BANK));
2477 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2479 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2480 NUM_BANKS(ADDR_SURF_8_BANK));
2481 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2484 NUM_BANKS(ADDR_SURF_8_BANK));
2485 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2486 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2487 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2488 NUM_BANKS(ADDR_SURF_8_BANK));
2489 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2492 NUM_BANKS(ADDR_SURF_8_BANK));
2493 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2494 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2495 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2496 NUM_BANKS(ADDR_SURF_4_BANK));
2497
2498 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2499 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2500
2501 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2502 if (reg_offset != 7)
2503 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2504
5f2e816b 2505 break;
aaa36a97 2506 case CHIP_TONGA:
90bea0ab
TSD
2507 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2509 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2510 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2511 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2512 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2513 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2514 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2515 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2516 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2517 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2518 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2519 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2521 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2522 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2523 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2525 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2526 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2527 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2528 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2529 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2530 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2531 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2532 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2533 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2534 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2535 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2536 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2537 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2538 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2539 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2540 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2541 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2542 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2543 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2544 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2545 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2546 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2547 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2548 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2549 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2550 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2551 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2552 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2553 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2554 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2555 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2556 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2557 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2558 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2559 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2560 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2561 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2562 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2563 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2564 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2565 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2566 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2567 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2568 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2569 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2570 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2571 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2572 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2573 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2574 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2575 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2576 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2577 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2578 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2579 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2580 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2581 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2582 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2583 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2584 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2585 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2586 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2587 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2588 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2589 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2590 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2591 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2592 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2593 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2594 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2595 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2596 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2597 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2598 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2599 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2600 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2601 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2602 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2603 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2604 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2605 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2606 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2607 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2608 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2609 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2610 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2611 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2612 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2613 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2615 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2616 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2617 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2618 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2619 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2620 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2621 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2622 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2623 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2624 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2625 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2626 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2627 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2629
2630 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2631 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2632 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2633 NUM_BANKS(ADDR_SURF_16_BANK));
2634 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2635 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2636 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2637 NUM_BANKS(ADDR_SURF_16_BANK));
2638 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2640 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2641 NUM_BANKS(ADDR_SURF_16_BANK));
2642 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2643 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2644 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2645 NUM_BANKS(ADDR_SURF_16_BANK));
2646 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2647 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2648 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2649 NUM_BANKS(ADDR_SURF_16_BANK));
2650 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2653 NUM_BANKS(ADDR_SURF_16_BANK));
2654 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2657 NUM_BANKS(ADDR_SURF_16_BANK));
2658 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2659 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2660 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2661 NUM_BANKS(ADDR_SURF_16_BANK));
2662 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2663 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2664 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2665 NUM_BANKS(ADDR_SURF_16_BANK));
2666 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2667 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2668 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2669 NUM_BANKS(ADDR_SURF_16_BANK));
2670 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2671 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2672 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2673 NUM_BANKS(ADDR_SURF_16_BANK));
2674 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2676 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2677 NUM_BANKS(ADDR_SURF_8_BANK));
2678 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2679 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2680 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2681 NUM_BANKS(ADDR_SURF_4_BANK));
2682 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2683 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2684 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2685 NUM_BANKS(ADDR_SURF_4_BANK));
2686
2687 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2688 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2689
2690 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2691 if (reg_offset != 7)
2692 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2693
68182d90 2694 break;
2cc0c0b5 2695 case CHIP_POLARIS11:
68182d90
FC
2696 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2697 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2698 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2699 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2700 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2702 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2703 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2704 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2706 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2707 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2708 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2709 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2710 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2711 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2712 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2713 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2714 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2715 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2716 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2717 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2718 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2719 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2720 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2721 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2722 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2723 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2724 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2725 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2726 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2727 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2728 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2729 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2730 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2731 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2732 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2733 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2734 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2736 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2737 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2738 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2739 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2740 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2741 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2742 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2743 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2744 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2745 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2746 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2747 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2748 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2749 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2750 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2751 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2752 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2753 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2754 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2755 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2756 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2757 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2758 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2759 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2760 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2761 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2762 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2763 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2764 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2765 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2766 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2767 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2768 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2769 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2770 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2771 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2772 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2773 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2774 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2775 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2776 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2777 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2778 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2779 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2780 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2781 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2782 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2783 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2784 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2785 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2786 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2787 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2788 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2789 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2790 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2791 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2792 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2793 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2794 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2795 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2796 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2797 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2798 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
2799 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2800 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2801 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2802 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2803 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2804 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2805 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2806 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2808 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2809 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2810 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2811 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2812 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2813 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2814 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2815 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2816 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2817 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2818
2819 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2820 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2821 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2822 NUM_BANKS(ADDR_SURF_16_BANK));
2823
2824 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2826 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2827 NUM_BANKS(ADDR_SURF_16_BANK));
2828
2829 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2831 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2832 NUM_BANKS(ADDR_SURF_16_BANK));
2833
2834 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2835 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2836 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2837 NUM_BANKS(ADDR_SURF_16_BANK));
2838
2839 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2840 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2841 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2842 NUM_BANKS(ADDR_SURF_16_BANK));
2843
2844 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2846 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2847 NUM_BANKS(ADDR_SURF_16_BANK));
2848
2849 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2850 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2851 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2852 NUM_BANKS(ADDR_SURF_16_BANK));
2853
2854 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2855 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2856 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2857 NUM_BANKS(ADDR_SURF_16_BANK));
2858
2859 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2860 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2861 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2862 NUM_BANKS(ADDR_SURF_16_BANK));
2863
2864 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2865 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2866 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2867 NUM_BANKS(ADDR_SURF_16_BANK));
2868
2869 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2871 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2872 NUM_BANKS(ADDR_SURF_16_BANK));
2873
2874 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2875 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2876 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2877 NUM_BANKS(ADDR_SURF_16_BANK));
2878
2879 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2880 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2881 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2882 NUM_BANKS(ADDR_SURF_8_BANK));
2883
2884 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2885 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2886 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2887 NUM_BANKS(ADDR_SURF_4_BANK));
2888
2889 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2890 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
2891
2892 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
2893 if (reg_offset != 7)
2894 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
2895
2896 break;
2cc0c0b5 2897 case CHIP_POLARIS10:
68182d90
FC
2898 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2899 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2900 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2901 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2902 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2904 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2905 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2906 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2907 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2908 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2909 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2910 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2911 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2912 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2913 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2914 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2915 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2916 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2917 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2918 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2920 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2921 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2922 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2923 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2924 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2925 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2926 modearray[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2927 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2928 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
2929 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2930 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2931 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2932 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2933 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2934 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2935 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2936 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2937 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2938 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2939 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2940 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2941 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2942 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2943 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2944 modearray[12] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2945 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2946 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2947 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2948 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2949 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2950 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2951 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2952 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2953 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2954 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2955 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2956 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
2957 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2958 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2960 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2961 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2962 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2963 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2964 modearray[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2965 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2966 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2967 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
2968 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2969 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2970 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2971 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2972 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
2973 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2974 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2975 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2976 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2977 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2978 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2979 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2980 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
2981 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2982 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2983 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2984 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2985 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2986 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2987 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2988 modearray[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
2989 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2990 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2991 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2992 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
2993 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2994 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2995 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
2996 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
2997 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2998 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
2999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3000 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3001 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3002 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3003 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3004 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3005 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3006 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3007 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3008 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3009 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3010 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3012 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3013 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
3014 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3015 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3016 modearray[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3017 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
3018 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3019 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3020
3021 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3022 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3023 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3024 NUM_BANKS(ADDR_SURF_16_BANK));
3025
3026 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3027 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3028 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3029 NUM_BANKS(ADDR_SURF_16_BANK));
3030
3031 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3033 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034 NUM_BANKS(ADDR_SURF_16_BANK));
3035
3036 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3037 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3038 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3039 NUM_BANKS(ADDR_SURF_16_BANK));
3040
3041 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3042 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3043 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3044 NUM_BANKS(ADDR_SURF_16_BANK));
3045
3046 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3047 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3048 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3049 NUM_BANKS(ADDR_SURF_16_BANK));
3050
3051 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3052 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3053 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3054 NUM_BANKS(ADDR_SURF_16_BANK));
3055
3056 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3057 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3058 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3059 NUM_BANKS(ADDR_SURF_16_BANK));
3060
3061 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3062 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3063 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064 NUM_BANKS(ADDR_SURF_16_BANK));
3065
3066 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3067 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3068 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3069 NUM_BANKS(ADDR_SURF_16_BANK));
3070
3071 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3072 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3073 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3074 NUM_BANKS(ADDR_SURF_16_BANK));
3075
3076 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3077 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3078 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3079 NUM_BANKS(ADDR_SURF_8_BANK));
3080
3081 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3083 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3084 NUM_BANKS(ADDR_SURF_4_BANK));
3085
3086 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3087 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3088 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3089 NUM_BANKS(ADDR_SURF_4_BANK));
3090
3091 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3092 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3093
3094 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3095 if (reg_offset != 7)
3096 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3097
aaa36a97 3098 break;
e3c7656c 3099 case CHIP_STONEY:
90bea0ab
TSD
3100 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3101 PIPE_CONFIG(ADDR_SURF_P2) |
3102 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3103 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3104 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3105 PIPE_CONFIG(ADDR_SURF_P2) |
3106 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3107 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3108 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3109 PIPE_CONFIG(ADDR_SURF_P2) |
3110 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3111 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3112 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3113 PIPE_CONFIG(ADDR_SURF_P2) |
3114 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3115 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3116 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3117 PIPE_CONFIG(ADDR_SURF_P2) |
3118 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3119 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3120 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3121 PIPE_CONFIG(ADDR_SURF_P2) |
3122 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3123 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3124 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3125 PIPE_CONFIG(ADDR_SURF_P2) |
3126 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3127 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3128 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3129 PIPE_CONFIG(ADDR_SURF_P2));
3130 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3131 PIPE_CONFIG(ADDR_SURF_P2) |
3132 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3133 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3134 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3135 PIPE_CONFIG(ADDR_SURF_P2) |
3136 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3137 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3138 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3139 PIPE_CONFIG(ADDR_SURF_P2) |
3140 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3141 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3142 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3143 PIPE_CONFIG(ADDR_SURF_P2) |
3144 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3145 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3146 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3147 PIPE_CONFIG(ADDR_SURF_P2) |
3148 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3149 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3150 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3151 PIPE_CONFIG(ADDR_SURF_P2) |
3152 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3153 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3154 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3155 PIPE_CONFIG(ADDR_SURF_P2) |
3156 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3157 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3158 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3159 PIPE_CONFIG(ADDR_SURF_P2) |
3160 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3161 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3162 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3163 PIPE_CONFIG(ADDR_SURF_P2) |
3164 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3165 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3166 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3167 PIPE_CONFIG(ADDR_SURF_P2) |
3168 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3169 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3170 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3171 PIPE_CONFIG(ADDR_SURF_P2) |
3172 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3173 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3174 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3175 PIPE_CONFIG(ADDR_SURF_P2) |
3176 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3177 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3178 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3179 PIPE_CONFIG(ADDR_SURF_P2) |
3180 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3181 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3182 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3183 PIPE_CONFIG(ADDR_SURF_P2) |
3184 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3185 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3186 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3187 PIPE_CONFIG(ADDR_SURF_P2) |
3188 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3189 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3190 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3191 PIPE_CONFIG(ADDR_SURF_P2) |
3192 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3193 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3194 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3195 PIPE_CONFIG(ADDR_SURF_P2) |
3196 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3197 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3198 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3199 PIPE_CONFIG(ADDR_SURF_P2) |
3200 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3201 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3202
3203 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3204 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3205 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3206 NUM_BANKS(ADDR_SURF_8_BANK));
3207 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3208 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3209 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3210 NUM_BANKS(ADDR_SURF_8_BANK));
3211 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3212 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3213 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3214 NUM_BANKS(ADDR_SURF_8_BANK));
3215 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3216 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3217 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3218 NUM_BANKS(ADDR_SURF_8_BANK));
3219 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3220 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3221 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3222 NUM_BANKS(ADDR_SURF_8_BANK));
3223 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3224 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3225 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3226 NUM_BANKS(ADDR_SURF_8_BANK));
3227 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3228 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3229 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3230 NUM_BANKS(ADDR_SURF_8_BANK));
3231 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3232 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3233 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3234 NUM_BANKS(ADDR_SURF_16_BANK));
3235 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3236 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3237 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3238 NUM_BANKS(ADDR_SURF_16_BANK));
3239 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3240 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3241 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3242 NUM_BANKS(ADDR_SURF_16_BANK));
3243 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3244 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3245 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3246 NUM_BANKS(ADDR_SURF_16_BANK));
3247 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3248 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3249 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3250 NUM_BANKS(ADDR_SURF_16_BANK));
3251 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3252 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3253 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3254 NUM_BANKS(ADDR_SURF_16_BANK));
3255 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3256 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3257 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3258 NUM_BANKS(ADDR_SURF_8_BANK));
3259
3260 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3261 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3262 reg_offset != 23)
3263 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3264
3265 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3266 if (reg_offset != 7)
3267 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3268
e3c7656c 3269 break;
aaa36a97 3270 default:
90bea0ab
TSD
3271 dev_warn(adev->dev,
3272 "Unknown chip type (%d) in function gfx_v8_0_tiling_mode_table_init() falling through to CHIP_CARRIZO\n",
3273 adev->asic_type);
3274
3275 case CHIP_CARRIZO:
3276 modearray[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3277 PIPE_CONFIG(ADDR_SURF_P2) |
3278 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
3279 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3280 modearray[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3281 PIPE_CONFIG(ADDR_SURF_P2) |
3282 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
3283 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3284 modearray[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3285 PIPE_CONFIG(ADDR_SURF_P2) |
3286 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
3287 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3288 modearray[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3289 PIPE_CONFIG(ADDR_SURF_P2) |
3290 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
3291 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3292 modearray[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3293 PIPE_CONFIG(ADDR_SURF_P2) |
3294 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3295 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3296 modearray[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3297 PIPE_CONFIG(ADDR_SURF_P2) |
3298 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3299 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3300 modearray[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3301 PIPE_CONFIG(ADDR_SURF_P2) |
3302 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
3303 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3304 modearray[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3305 PIPE_CONFIG(ADDR_SURF_P2));
3306 modearray[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3307 PIPE_CONFIG(ADDR_SURF_P2) |
3308 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3309 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3310 modearray[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3311 PIPE_CONFIG(ADDR_SURF_P2) |
3312 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3314 modearray[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3315 PIPE_CONFIG(ADDR_SURF_P2) |
3316 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3317 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3318 modearray[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3319 PIPE_CONFIG(ADDR_SURF_P2) |
3320 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3321 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3322 modearray[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3323 PIPE_CONFIG(ADDR_SURF_P2) |
3324 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3326 modearray[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
3327 PIPE_CONFIG(ADDR_SURF_P2) |
3328 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3329 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3330 modearray[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3331 PIPE_CONFIG(ADDR_SURF_P2) |
3332 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3333 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3334 modearray[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3335 PIPE_CONFIG(ADDR_SURF_P2) |
3336 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3338 modearray[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
3339 PIPE_CONFIG(ADDR_SURF_P2) |
3340 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3341 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3342 modearray[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3343 PIPE_CONFIG(ADDR_SURF_P2) |
3344 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3345 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3346 modearray[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
3347 PIPE_CONFIG(ADDR_SURF_P2) |
3348 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3350 modearray[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
3351 PIPE_CONFIG(ADDR_SURF_P2) |
3352 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3353 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3354 modearray[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
3355 PIPE_CONFIG(ADDR_SURF_P2) |
3356 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3357 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3358 modearray[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
3359 PIPE_CONFIG(ADDR_SURF_P2) |
3360 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3362 modearray[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
3363 PIPE_CONFIG(ADDR_SURF_P2) |
3364 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
3365 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
3366 modearray[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3367 PIPE_CONFIG(ADDR_SURF_P2) |
3368 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3369 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3370 modearray[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3371 PIPE_CONFIG(ADDR_SURF_P2) |
3372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3374 modearray[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3375 PIPE_CONFIG(ADDR_SURF_P2) |
3376 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3377 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
3378
3379 mod2array[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3380 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3381 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3382 NUM_BANKS(ADDR_SURF_8_BANK));
3383 mod2array[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3384 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3385 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3386 NUM_BANKS(ADDR_SURF_8_BANK));
3387 mod2array[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3388 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3389 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3390 NUM_BANKS(ADDR_SURF_8_BANK));
3391 mod2array[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3392 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3393 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3394 NUM_BANKS(ADDR_SURF_8_BANK));
3395 mod2array[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3396 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3397 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3398 NUM_BANKS(ADDR_SURF_8_BANK));
3399 mod2array[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3400 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3401 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3402 NUM_BANKS(ADDR_SURF_8_BANK));
3403 mod2array[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3406 NUM_BANKS(ADDR_SURF_8_BANK));
3407 mod2array[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3408 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3409 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3410 NUM_BANKS(ADDR_SURF_16_BANK));
3411 mod2array[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3412 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3413 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3414 NUM_BANKS(ADDR_SURF_16_BANK));
3415 mod2array[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3418 NUM_BANKS(ADDR_SURF_16_BANK));
3419 mod2array[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3420 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3421 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3422 NUM_BANKS(ADDR_SURF_16_BANK));
3423 mod2array[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3424 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3425 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3426 NUM_BANKS(ADDR_SURF_16_BANK));
3427 mod2array[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3430 NUM_BANKS(ADDR_SURF_16_BANK));
3431 mod2array[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3432 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3433 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3434 NUM_BANKS(ADDR_SURF_8_BANK));
3435
3436 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
3437 if (reg_offset != 7 && reg_offset != 12 && reg_offset != 17 &&
3438 reg_offset != 23)
3439 WREG32(mmGB_TILE_MODE0 + reg_offset, modearray[reg_offset]);
3440
3441 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
3442 if (reg_offset != 7)
3443 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, mod2array[reg_offset]);
3444
3445 break;
aaa36a97
AD
3446 }
3447}
3448
05fb7291 3449static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
9559ef5b 3450 u32 se_num, u32 sh_num, u32 instance)
aaa36a97 3451{
9559ef5b
TSD
3452 u32 data;
3453
3454 if (instance == 0xffffffff)
3455 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
3456 else
3457 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_INDEX, instance);
aaa36a97
AD
3458
3459 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
3460 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3461 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3462 } else if (se_num == 0xffffffff) {
3463 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3464 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
3465 } else if (sh_num == 0xffffffff) {
3466 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
3467 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3468 } else {
3469 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
3470 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
3471 }
3472 WREG32(mmGRBM_GFX_INDEX, data);
3473}
3474
8f8e00c1
AD
3475static u32 gfx_v8_0_create_bitmask(u32 bit_width)
3476{
3477 return (u32)((1ULL << bit_width) - 1);
3478}
3479
3480static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
aaa36a97
AD
3481{
3482 u32 data, mask;
3483
3484 data = RREG32(mmCC_RB_BACKEND_DISABLE);
aaa36a97
AD
3485 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
3486
8f8e00c1 3487 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
aaa36a97
AD
3488 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
3489
8f8e00c1
AD
3490 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
3491 adev->gfx.config.max_sh_per_se);
aaa36a97 3492
8f8e00c1 3493 return (~data) & mask;
aaa36a97
AD
3494}
3495
8f8e00c1 3496static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
aaa36a97
AD
3497{
3498 int i, j;
aac1e3ca 3499 u32 data;
8f8e00c1 3500 u32 active_rbs = 0;
6157bd7a
FC
3501 u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
3502 adev->gfx.config.max_sh_per_se;
aaa36a97
AD
3503
3504 mutex_lock(&adev->grbm_idx_mutex);
8f8e00c1
AD
3505 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3506 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3507 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
8f8e00c1
AD
3508 data = gfx_v8_0_get_rb_active_bitmap(adev);
3509 active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
6157bd7a 3510 rb_bitmap_width_per_sh);
aaa36a97
AD
3511 }
3512 }
9559ef5b 3513 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3514 mutex_unlock(&adev->grbm_idx_mutex);
3515
8f8e00c1 3516 adev->gfx.config.backend_enable_mask = active_rbs;
aac1e3ca 3517 adev->gfx.config.num_rbs = hweight32(active_rbs);
aaa36a97
AD
3518}
3519
cd06bf68 3520/**
35c7a952 3521 * gfx_v8_0_init_compute_vmid - gart enable
cd06bf68
BG
3522 *
3523 * @rdev: amdgpu_device pointer
3524 *
3525 * Initialize compute vmid sh_mem registers
3526 *
3527 */
3528#define DEFAULT_SH_MEM_BASES (0x6000)
3529#define FIRST_COMPUTE_VMID (8)
3530#define LAST_COMPUTE_VMID (16)
35c7a952 3531static void gfx_v8_0_init_compute_vmid(struct amdgpu_device *adev)
cd06bf68
BG
3532{
3533 int i;
3534 uint32_t sh_mem_config;
3535 uint32_t sh_mem_bases;
3536
3537 /*
3538 * Configure apertures:
3539 * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB)
3540 * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB)
3541 * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB)
3542 */
3543 sh_mem_bases = DEFAULT_SH_MEM_BASES | (DEFAULT_SH_MEM_BASES << 16);
3544
3545 sh_mem_config = SH_MEM_ADDRESS_MODE_HSA64 <<
3546 SH_MEM_CONFIG__ADDRESS_MODE__SHIFT |
3547 SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
3548 SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT |
3549 MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT |
3550 SH_MEM_CONFIG__PRIVATE_ATC_MASK;
3551
3552 mutex_lock(&adev->srbm_mutex);
3553 for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
3554 vi_srbm_select(adev, 0, 0, 0, i);
3555 /* CP and shaders */
3556 WREG32(mmSH_MEM_CONFIG, sh_mem_config);
3557 WREG32(mmSH_MEM_APE1_BASE, 1);
3558 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3559 WREG32(mmSH_MEM_BASES, sh_mem_bases);
3560 }
3561 vi_srbm_select(adev, 0, 0, 0, 0);
3562 mutex_unlock(&adev->srbm_mutex);
3563}
3564
aaa36a97
AD
3565static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
3566{
aaa36a97
AD
3567 u32 tmp;
3568 int i;
3569
aaa36a97
AD
3570 tmp = RREG32(mmGRBM_CNTL);
3571 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
3572 WREG32(mmGRBM_CNTL, tmp);
3573
0bde3a95
AD
3574 WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3575 WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
3576 WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
aaa36a97
AD
3577
3578 gfx_v8_0_tiling_mode_table_init(adev);
3579
8f8e00c1 3580 gfx_v8_0_setup_rb(adev);
7dae69a2 3581 gfx_v8_0_get_cu_info(adev);
aaa36a97
AD
3582
3583 /* XXX SH_MEM regs */
3584 /* where to put LDS, scratch, GPUVM in FSA64 space */
3585 mutex_lock(&adev->srbm_mutex);
3586 for (i = 0; i < 16; i++) {
3587 vi_srbm_select(adev, 0, 0, 0, i);
3588 /* CP and shaders */
3589 if (i == 0) {
3590 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
3591 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
0bde3a95 3592 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3593 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
3594 WREG32(mmSH_MEM_CONFIG, tmp);
3595 } else {
3596 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
3597 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
0bde3a95 3598 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, ALIGNMENT_MODE,
74a5d165 3599 SH_MEM_ALIGNMENT_MODE_UNALIGNED);
aaa36a97
AD
3600 WREG32(mmSH_MEM_CONFIG, tmp);
3601 }
3602
3603 WREG32(mmSH_MEM_APE1_BASE, 1);
3604 WREG32(mmSH_MEM_APE1_LIMIT, 0);
3605 WREG32(mmSH_MEM_BASES, 0);
3606 }
3607 vi_srbm_select(adev, 0, 0, 0, 0);
3608 mutex_unlock(&adev->srbm_mutex);
3609
35c7a952 3610 gfx_v8_0_init_compute_vmid(adev);
cd06bf68 3611
aaa36a97
AD
3612 mutex_lock(&adev->grbm_idx_mutex);
3613 /*
3614 * making sure that the following register writes will be broadcasted
3615 * to all the shaders
3616 */
9559ef5b 3617 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3618
3619 WREG32(mmPA_SC_FIFO_SIZE,
3620 (adev->gfx.config.sc_prim_fifo_size_frontend <<
3621 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
3622 (adev->gfx.config.sc_prim_fifo_size_backend <<
3623 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
3624 (adev->gfx.config.sc_hiz_tile_fifo_size <<
3625 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
3626 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
3627 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
3628 mutex_unlock(&adev->grbm_idx_mutex);
3629
3630}
3631
3632static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
3633{
3634 u32 i, j, k;
3635 u32 mask;
3636
3637 mutex_lock(&adev->grbm_idx_mutex);
3638 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3639 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
9559ef5b 3640 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
aaa36a97
AD
3641 for (k = 0; k < adev->usec_timeout; k++) {
3642 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
3643 break;
3644 udelay(1);
3645 }
3646 }
3647 }
9559ef5b 3648 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
aaa36a97
AD
3649 mutex_unlock(&adev->grbm_idx_mutex);
3650
3651 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
3652 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
3653 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
3654 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
3655 for (k = 0; k < adev->usec_timeout; k++) {
3656 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
3657 break;
3658 udelay(1);
3659 }
3660}
3661
3662static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
3663 bool enable)
3664{
3665 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
3666
0d07db7e
TSD
3667 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, enable ? 1 : 0);
3668 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, enable ? 1 : 0);
3669 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, enable ? 1 : 0);
3670 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, enable ? 1 : 0);
3671
aaa36a97
AD
3672 WREG32(mmCP_INT_CNTL_RING0, tmp);
3673}
3674
2b6cd977
EH
3675static void gfx_v8_0_init_csb(struct amdgpu_device *adev)
3676{
3677 /* csib */
3678 WREG32(mmRLC_CSIB_ADDR_HI,
3679 adev->gfx.rlc.clear_state_gpu_addr >> 32);
3680 WREG32(mmRLC_CSIB_ADDR_LO,
3681 adev->gfx.rlc.clear_state_gpu_addr & 0xfffffffc);
3682 WREG32(mmRLC_CSIB_LENGTH,
3683 adev->gfx.rlc.clear_state_size);
3684}
3685
3686static void gfx_v8_0_parse_ind_reg_list(int *register_list_format,
3687 int ind_offset,
3688 int list_size,
3689 int *unique_indices,
3690 int *indices_count,
3691 int max_indices,
3692 int *ind_start_offsets,
3693 int *offset_count,
3694 int max_offset)
3695{
3696 int indices;
3697 bool new_entry = true;
3698
3699 for (; ind_offset < list_size; ind_offset++) {
3700
3701 if (new_entry) {
3702 new_entry = false;
3703 ind_start_offsets[*offset_count] = ind_offset;
3704 *offset_count = *offset_count + 1;
3705 BUG_ON(*offset_count >= max_offset);
3706 }
3707
3708 if (register_list_format[ind_offset] == 0xFFFFFFFF) {
3709 new_entry = true;
3710 continue;
3711 }
3712
3713 ind_offset += 2;
3714
3715 /* look for the matching indice */
3716 for (indices = 0;
3717 indices < *indices_count;
3718 indices++) {
3719 if (unique_indices[indices] ==
3720 register_list_format[ind_offset])
3721 break;
3722 }
3723
3724 if (indices >= *indices_count) {
3725 unique_indices[*indices_count] =
3726 register_list_format[ind_offset];
3727 indices = *indices_count;
3728 *indices_count = *indices_count + 1;
3729 BUG_ON(*indices_count >= max_indices);
3730 }
3731
3732 register_list_format[ind_offset] = indices;
3733 }
3734}
3735
3736static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev)
3737{
3738 int i, temp, data;
3739 int unique_indices[] = {0, 0, 0, 0, 0, 0, 0, 0};
3740 int indices_count = 0;
3741 int indirect_start_offsets[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
3742 int offset_count = 0;
3743
3744 int list_size;
3745 unsigned int *register_list_format =
3746 kmalloc(adev->gfx.rlc.reg_list_format_size_bytes, GFP_KERNEL);
3747 if (register_list_format == NULL)
3748 return -ENOMEM;
3749 memcpy(register_list_format, adev->gfx.rlc.register_list_format,
3750 adev->gfx.rlc.reg_list_format_size_bytes);
3751
3752 gfx_v8_0_parse_ind_reg_list(register_list_format,
3753 RLC_FormatDirectRegListLength,
3754 adev->gfx.rlc.reg_list_format_size_bytes >> 2,
3755 unique_indices,
3756 &indices_count,
3757 sizeof(unique_indices) / sizeof(int),
3758 indirect_start_offsets,
3759 &offset_count,
3760 sizeof(indirect_start_offsets)/sizeof(int));
3761
3762 /* save and restore list */
3763 temp = RREG32(mmRLC_SRM_CNTL);
3764 temp |= RLC_SRM_CNTL__AUTO_INCR_ADDR_MASK;
3765 WREG32(mmRLC_SRM_CNTL, temp);
3766
3767 WREG32(mmRLC_SRM_ARAM_ADDR, 0);
3768 for (i = 0; i < adev->gfx.rlc.reg_list_size_bytes >> 2; i++)
3769 WREG32(mmRLC_SRM_ARAM_DATA, adev->gfx.rlc.register_restore[i]);
3770
3771 /* indirect list */
3772 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_list_format_start);
3773 for (i = 0; i < adev->gfx.rlc.reg_list_format_size_bytes >> 2; i++)
3774 WREG32(mmRLC_GPM_SCRATCH_DATA, register_list_format[i]);
3775
3776 list_size = adev->gfx.rlc.reg_list_size_bytes >> 2;
3777 list_size = list_size >> 1;
3778 WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.reg_restore_list_size);
3779 WREG32(mmRLC_GPM_SCRATCH_DATA, list_size);
3780
3781 /* starting offsets starts */
3782 WREG32(mmRLC_GPM_SCRATCH_ADDR,
3783 adev->gfx.rlc.starting_offsets_start);
3784 for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++)
3785 WREG32(mmRLC_GPM_SCRATCH_DATA,
3786 indirect_start_offsets[i]);
3787
3788 /* unique indices */
3789 temp = mmRLC_SRM_INDEX_CNTL_ADDR_0;
3790 data = mmRLC_SRM_INDEX_CNTL_DATA_0;
3791 for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) {
3792 amdgpu_mm_wreg(adev, temp + i, unique_indices[i] & 0x3FFFF, false);
3793 amdgpu_mm_wreg(adev, data + i, unique_indices[i] >> 20, false);
3794 }
3795 kfree(register_list_format);
3796
3797 return 0;
3798}
3799
3800static void gfx_v8_0_enable_save_restore_machine(struct amdgpu_device *adev)
3801{
3802 uint32_t data;
3803
3804 data = RREG32(mmRLC_SRM_CNTL);
3805 data |= RLC_SRM_CNTL__SRM_ENABLE_MASK;
3806 WREG32(mmRLC_SRM_CNTL, data);
3807}
3808
fb16007b 3809static void gfx_v8_0_init_power_gating(struct amdgpu_device *adev)
f4bfffdd
EH
3810{
3811 uint32_t data;
3812
3813 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
fb16007b
AD
3814 AMD_PG_SUPPORT_GFX_SMG |
3815 AMD_PG_SUPPORT_GFX_DMG)) {
f4bfffdd
EH
3816 data = RREG32(mmCP_RB_WPTR_POLL_CNTL);
3817 data &= ~CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT_MASK;
3818 data |= (0x60 << CP_RB_WPTR_POLL_CNTL__IDLE_POLL_COUNT__SHIFT);
3819 WREG32(mmCP_RB_WPTR_POLL_CNTL, data);
3820
3821 data = 0;
3822 data |= (0x10 << RLC_PG_DELAY__POWER_UP_DELAY__SHIFT);
3823 data |= (0x10 << RLC_PG_DELAY__POWER_DOWN_DELAY__SHIFT);
3824 data |= (0x10 << RLC_PG_DELAY__CMD_PROPAGATE_DELAY__SHIFT);
3825 data |= (0x10 << RLC_PG_DELAY__MEM_SLEEP_DELAY__SHIFT);
3826 WREG32(mmRLC_PG_DELAY, data);
3827
3828 data = RREG32(mmRLC_PG_DELAY_2);
3829 data &= ~RLC_PG_DELAY_2__SERDES_CMD_DELAY_MASK;
3830 data |= (0x3 << RLC_PG_DELAY_2__SERDES_CMD_DELAY__SHIFT);
3831 WREG32(mmRLC_PG_DELAY_2, data);
3832
3833 data = RREG32(mmRLC_AUTO_PG_CTRL);
3834 data &= ~RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD_MASK;
3835 data |= (0x55f0 << RLC_AUTO_PG_CTRL__GRBM_REG_SAVE_GFX_IDLE_THRESHOLD__SHIFT);
3836 WREG32(mmRLC_AUTO_PG_CTRL, data);
3837 }
3838}
3839
2c547165
AD
3840static void cz_enable_sck_slow_down_on_power_up(struct amdgpu_device *adev,
3841 bool enable)
3842{
3843 u32 data, orig;
3844
3845 orig = data = RREG32(mmRLC_PG_CNTL);
3846
3847 if (enable)
3848 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3849 else
3850 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
3851
3852 if (orig != data)
3853 WREG32(mmRLC_PG_CNTL, data);
3854}
3855
3856static void cz_enable_sck_slow_down_on_power_down(struct amdgpu_device *adev,
3857 bool enable)
3858{
3859 u32 data, orig;
3860
3861 orig = data = RREG32(mmRLC_PG_CNTL);
3862
3863 if (enable)
3864 data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3865 else
3866 data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
3867
3868 if (orig != data)
3869 WREG32(mmRLC_PG_CNTL, data);
3870}
3871
3872static void cz_enable_cp_power_gating(struct amdgpu_device *adev, bool enable)
3873{
3874 u32 data, orig;
3875
3876 orig = data = RREG32(mmRLC_PG_CNTL);
3877
3878 if (enable)
3879 data &= ~RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3880 else
3881 data |= RLC_PG_CNTL__CP_PG_DISABLE_MASK;
3882
3883 if (orig != data)
3884 WREG32(mmRLC_PG_CNTL, data);
3885}
3886
2b6cd977
EH
3887static void gfx_v8_0_init_pg(struct amdgpu_device *adev)
3888{
3889 if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
3890 AMD_PG_SUPPORT_GFX_SMG |
3891 AMD_PG_SUPPORT_GFX_DMG |
3892 AMD_PG_SUPPORT_CP |
3893 AMD_PG_SUPPORT_GDS |
3894 AMD_PG_SUPPORT_RLC_SMU_HS)) {
3895 gfx_v8_0_init_csb(adev);
3896 gfx_v8_0_init_save_restore_list(adev);
3897 gfx_v8_0_enable_save_restore_machine(adev);
f4bfffdd 3898
fb16007b
AD
3899 if ((adev->asic_type == CHIP_CARRIZO) ||
3900 (adev->asic_type == CHIP_STONEY)) {
3901 WREG32(mmRLC_JUMP_TABLE_RESTORE, adev->gfx.rlc.cp_table_gpu_addr >> 8);
3902 gfx_v8_0_init_power_gating(adev);
3903 WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
2c547165
AD
3904 if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) {
3905 cz_enable_sck_slow_down_on_power_up(adev, true);
3906 cz_enable_sck_slow_down_on_power_down(adev, true);
3907 } else {
3908 cz_enable_sck_slow_down_on_power_up(adev, false);
3909 cz_enable_sck_slow_down_on_power_down(adev, false);
3910 }
3911 if (adev->pg_flags & AMD_PG_SUPPORT_CP)
3912 cz_enable_cp_power_gating(adev, true);
3913 else
3914 cz_enable_cp_power_gating(adev, false);
fb16007b
AD
3915 } else if (adev->asic_type == CHIP_POLARIS11) {
3916 gfx_v8_0_init_power_gating(adev);
3917 }
2b6cd977
EH
3918 }
3919}
3920
aaa36a97
AD
3921void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
3922{
3923 u32 tmp = RREG32(mmRLC_CNTL);
3924
3925 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
3926 WREG32(mmRLC_CNTL, tmp);
3927
3928 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3929
3930 gfx_v8_0_wait_for_rlc_serdes(adev);
3931}
3932
3933static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
3934{
3935 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
3936
3937 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3938 WREG32(mmGRBM_SOFT_RESET, tmp);
3939 udelay(50);
3940 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
3941 WREG32(mmGRBM_SOFT_RESET, tmp);
3942 udelay(50);
3943}
3944
3945static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
3946{
3947 u32 tmp = RREG32(mmRLC_CNTL);
3948
3949 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
3950 WREG32(mmRLC_CNTL, tmp);
3951
3952 /* carrizo do enable cp interrupt after cp inited */
e3c7656c 3953 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
3954 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3955
3956 udelay(50);
3957}
3958
3959static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
3960{
3961 const struct rlc_firmware_header_v2_0 *hdr;
3962 const __le32 *fw_data;
3963 unsigned i, fw_size;
3964
3965 if (!adev->gfx.rlc_fw)
3966 return -EINVAL;
3967
3968 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
3969 amdgpu_ucode_print_rlc_hdr(&hdr->header);
aaa36a97
AD
3970
3971 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
3972 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
3973 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
3974
3975 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
3976 for (i = 0; i < fw_size; i++)
3977 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
3978 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
3979
3980 return 0;
3981}
3982
3983static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
3984{
3985 int r;
3986
3987 gfx_v8_0_rlc_stop(adev);
3988
3989 /* disable CG */
3990 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2cc0c0b5
FC
3991 if (adev->asic_type == CHIP_POLARIS11 ||
3992 adev->asic_type == CHIP_POLARIS10)
68182d90 3993 WREG32(mmRLC_CGCG_CGLS_CTRL_3D, 0);
aaa36a97
AD
3994
3995 /* disable PG */
3996 WREG32(mmRLC_PG_CNTL, 0);
3997
3998 gfx_v8_0_rlc_reset(adev);
3999
2b6cd977
EH
4000 gfx_v8_0_init_pg(adev);
4001
e61710c5 4002 if (!adev->pp_enabled) {
ba5c2a87
RZ
4003 if (!adev->firmware.smu_load) {
4004 /* legacy rlc firmware loading */
4005 r = gfx_v8_0_rlc_load_microcode(adev);
4006 if (r)
4007 return r;
4008 } else {
4009 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4010 AMDGPU_UCODE_ID_RLC_G);
4011 if (r)
4012 return -EINVAL;
4013 }
aaa36a97
AD
4014 }
4015
4016 gfx_v8_0_rlc_start(adev);
4017
4018 return 0;
4019}
4020
4021static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
4022{
4023 int i;
4024 u32 tmp = RREG32(mmCP_ME_CNTL);
4025
4026 if (enable) {
4027 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
4028 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
4029 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
4030 } else {
4031 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
4032 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
4033 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
4034 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4035 adev->gfx.gfx_ring[i].ready = false;
4036 }
4037 WREG32(mmCP_ME_CNTL, tmp);
4038 udelay(50);
4039}
4040
4041static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
4042{
4043 const struct gfx_firmware_header_v1_0 *pfp_hdr;
4044 const struct gfx_firmware_header_v1_0 *ce_hdr;
4045 const struct gfx_firmware_header_v1_0 *me_hdr;
4046 const __le32 *fw_data;
4047 unsigned i, fw_size;
4048
4049 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
4050 return -EINVAL;
4051
4052 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
4053 adev->gfx.pfp_fw->data;
4054 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
4055 adev->gfx.ce_fw->data;
4056 me_hdr = (const struct gfx_firmware_header_v1_0 *)
4057 adev->gfx.me_fw->data;
4058
4059 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
4060 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
4061 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
aaa36a97
AD
4062
4063 gfx_v8_0_cp_gfx_enable(adev, false);
4064
4065 /* PFP */
4066 fw_data = (const __le32 *)
4067 (adev->gfx.pfp_fw->data +
4068 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4069 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4070 WREG32(mmCP_PFP_UCODE_ADDR, 0);
4071 for (i = 0; i < fw_size; i++)
4072 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4073 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
4074
4075 /* CE */
4076 fw_data = (const __le32 *)
4077 (adev->gfx.ce_fw->data +
4078 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4079 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4080 WREG32(mmCP_CE_UCODE_ADDR, 0);
4081 for (i = 0; i < fw_size; i++)
4082 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4083 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
4084
4085 /* ME */
4086 fw_data = (const __le32 *)
4087 (adev->gfx.me_fw->data +
4088 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4089 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4090 WREG32(mmCP_ME_RAM_WADDR, 0);
4091 for (i = 0; i < fw_size; i++)
4092 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4093 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
4094
4095 return 0;
4096}
4097
4098static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
4099{
4100 u32 count = 0;
4101 const struct cs_section_def *sect = NULL;
4102 const struct cs_extent_def *ext = NULL;
4103
4104 /* begin clear state */
4105 count += 2;
4106 /* context control state */
4107 count += 3;
4108
4109 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4110 for (ext = sect->section; ext->extent != NULL; ++ext) {
4111 if (sect->id == SECT_CONTEXT)
4112 count += 2 + ext->reg_count;
4113 else
4114 return 0;
4115 }
4116 }
4117 /* pa_sc_raster_config/pa_sc_raster_config1 */
4118 count += 4;
4119 /* end clear state */
4120 count += 2;
4121 /* clear state */
4122 count += 2;
4123
4124 return count;
4125}
4126
4127static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
4128{
4129 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
4130 const struct cs_section_def *sect = NULL;
4131 const struct cs_extent_def *ext = NULL;
4132 int r, i;
4133
4134 /* init the CP */
4135 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
4136 WREG32(mmCP_ENDIAN_SWAP, 0);
4137 WREG32(mmCP_DEVICE_ID, 1);
4138
4139 gfx_v8_0_cp_gfx_enable(adev, true);
4140
a27de35c 4141 r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
aaa36a97
AD
4142 if (r) {
4143 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
4144 return r;
4145 }
4146
4147 /* clear state buffer */
4148 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4149 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4150
4151 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4152 amdgpu_ring_write(ring, 0x80000000);
4153 amdgpu_ring_write(ring, 0x80000000);
4154
4155 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
4156 for (ext = sect->section; ext->extent != NULL; ++ext) {
4157 if (sect->id == SECT_CONTEXT) {
4158 amdgpu_ring_write(ring,
4159 PACKET3(PACKET3_SET_CONTEXT_REG,
4160 ext->reg_count));
4161 amdgpu_ring_write(ring,
4162 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
4163 for (i = 0; i < ext->reg_count; i++)
4164 amdgpu_ring_write(ring, ext->extent[i]);
4165 }
4166 }
4167 }
4168
4169 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4170 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
4171 switch (adev->asic_type) {
4172 case CHIP_TONGA:
2cc0c0b5 4173 case CHIP_POLARIS10:
aaa36a97
AD
4174 amdgpu_ring_write(ring, 0x16000012);
4175 amdgpu_ring_write(ring, 0x0000002A);
4176 break;
2cc0c0b5 4177 case CHIP_POLARIS11:
68182d90
FC
4178 amdgpu_ring_write(ring, 0x16000012);
4179 amdgpu_ring_write(ring, 0x00000000);
4180 break;
fa676048
FC
4181 case CHIP_FIJI:
4182 amdgpu_ring_write(ring, 0x3a00161a);
4183 amdgpu_ring_write(ring, 0x0000002e);
4184 break;
aaa36a97
AD
4185 case CHIP_CARRIZO:
4186 amdgpu_ring_write(ring, 0x00000002);
4187 amdgpu_ring_write(ring, 0x00000000);
4188 break;
d1a7f7aa
KW
4189 case CHIP_TOPAZ:
4190 amdgpu_ring_write(ring, adev->gfx.config.num_rbs == 1 ?
4191 0x00000000 : 0x00000002);
4192 amdgpu_ring_write(ring, 0x00000000);
4193 break;
e3c7656c
SL
4194 case CHIP_STONEY:
4195 amdgpu_ring_write(ring, 0x00000000);
4196 amdgpu_ring_write(ring, 0x00000000);
4197 break;
aaa36a97
AD
4198 default:
4199 BUG();
4200 }
4201
4202 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4203 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4204
4205 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4206 amdgpu_ring_write(ring, 0);
4207
4208 /* init the CE partitions */
4209 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4210 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4211 amdgpu_ring_write(ring, 0x8000);
4212 amdgpu_ring_write(ring, 0x8000);
4213
a27de35c 4214 amdgpu_ring_commit(ring);
aaa36a97
AD
4215
4216 return 0;
4217}
4218
4219static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
4220{
4221 struct amdgpu_ring *ring;
4222 u32 tmp;
4223 u32 rb_bufsz;
4224 u64 rb_addr, rptr_addr;
4225 int r;
4226
4227 /* Set the write pointer delay */
4228 WREG32(mmCP_RB_WPTR_DELAY, 0);
4229
4230 /* set the RB to use vmid 0 */
4231 WREG32(mmCP_RB_VMID, 0);
4232
4233 /* Set ring buffer size */
4234 ring = &adev->gfx.gfx_ring[0];
4235 rb_bufsz = order_base_2(ring->ring_size / 8);
4236 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
4237 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
4238 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
4239 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
4240#ifdef __BIG_ENDIAN
4241 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
4242#endif
4243 WREG32(mmCP_RB0_CNTL, tmp);
4244
4245 /* Initialize the ring buffer's read and write pointers */
4246 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
4247 ring->wptr = 0;
4248 WREG32(mmCP_RB0_WPTR, ring->wptr);
4249
4250 /* set the wb address wether it's enabled or not */
4251 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4252 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
4253 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
4254
4255 mdelay(1);
4256 WREG32(mmCP_RB0_CNTL, tmp);
4257
4258 rb_addr = ring->gpu_addr >> 8;
4259 WREG32(mmCP_RB0_BASE, rb_addr);
4260 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
4261
4262 /* no gfx doorbells on iceland */
4263 if (adev->asic_type != CHIP_TOPAZ) {
4264 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
4265 if (ring->use_doorbell) {
4266 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4267 DOORBELL_OFFSET, ring->doorbell_index);
68182d90
FC
4268 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4269 DOORBELL_HIT, 0);
aaa36a97
AD
4270 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4271 DOORBELL_EN, 1);
4272 } else {
4273 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
4274 DOORBELL_EN, 0);
4275 }
4276 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
4277
4278 if (adev->asic_type == CHIP_TONGA) {
4279 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
4280 DOORBELL_RANGE_LOWER,
4281 AMDGPU_DOORBELL_GFX_RING0);
4282 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
4283
4284 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
4285 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
4286 }
4287
4288 }
4289
4290 /* start the ring */
4291 gfx_v8_0_cp_gfx_start(adev);
4292 ring->ready = true;
4293 r = amdgpu_ring_test_ring(ring);
4294 if (r) {
4295 ring->ready = false;
4296 return r;
4297 }
4298
4299 return 0;
4300}
4301
4302static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
4303{
4304 int i;
4305
4306 if (enable) {
4307 WREG32(mmCP_MEC_CNTL, 0);
4308 } else {
4309 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
4310 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4311 adev->gfx.compute_ring[i].ready = false;
4312 }
4313 udelay(50);
4314}
4315
aaa36a97
AD
4316static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
4317{
4318 const struct gfx_firmware_header_v1_0 *mec_hdr;
4319 const __le32 *fw_data;
4320 unsigned i, fw_size;
4321
4322 if (!adev->gfx.mec_fw)
4323 return -EINVAL;
4324
4325 gfx_v8_0_cp_compute_enable(adev, false);
4326
4327 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
4328 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
aaa36a97
AD
4329
4330 fw_data = (const __le32 *)
4331 (adev->gfx.mec_fw->data +
4332 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4333 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4334
4335 /* MEC1 */
4336 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
4337 for (i = 0; i < fw_size; i++)
4338 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
4339 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
4340
4341 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
4342 if (adev->gfx.mec2_fw) {
4343 const struct gfx_firmware_header_v1_0 *mec2_hdr;
4344
4345 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
4346 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
aaa36a97
AD
4347
4348 fw_data = (const __le32 *)
4349 (adev->gfx.mec2_fw->data +
4350 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4351 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4352
4353 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
4354 for (i = 0; i < fw_size; i++)
4355 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
4356 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
4357 }
4358
4359 return 0;
4360}
4361
4362struct vi_mqd {
4363 uint32_t header; /* ordinal0 */
4364 uint32_t compute_dispatch_initiator; /* ordinal1 */
4365 uint32_t compute_dim_x; /* ordinal2 */
4366 uint32_t compute_dim_y; /* ordinal3 */
4367 uint32_t compute_dim_z; /* ordinal4 */
4368 uint32_t compute_start_x; /* ordinal5 */
4369 uint32_t compute_start_y; /* ordinal6 */
4370 uint32_t compute_start_z; /* ordinal7 */
4371 uint32_t compute_num_thread_x; /* ordinal8 */
4372 uint32_t compute_num_thread_y; /* ordinal9 */
4373 uint32_t compute_num_thread_z; /* ordinal10 */
4374 uint32_t compute_pipelinestat_enable; /* ordinal11 */
4375 uint32_t compute_perfcount_enable; /* ordinal12 */
4376 uint32_t compute_pgm_lo; /* ordinal13 */
4377 uint32_t compute_pgm_hi; /* ordinal14 */
4378 uint32_t compute_tba_lo; /* ordinal15 */
4379 uint32_t compute_tba_hi; /* ordinal16 */
4380 uint32_t compute_tma_lo; /* ordinal17 */
4381 uint32_t compute_tma_hi; /* ordinal18 */
4382 uint32_t compute_pgm_rsrc1; /* ordinal19 */
4383 uint32_t compute_pgm_rsrc2; /* ordinal20 */
4384 uint32_t compute_vmid; /* ordinal21 */
4385 uint32_t compute_resource_limits; /* ordinal22 */
4386 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
4387 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
4388 uint32_t compute_tmpring_size; /* ordinal25 */
4389 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
4390 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
4391 uint32_t compute_restart_x; /* ordinal28 */
4392 uint32_t compute_restart_y; /* ordinal29 */
4393 uint32_t compute_restart_z; /* ordinal30 */
4394 uint32_t compute_thread_trace_enable; /* ordinal31 */
4395 uint32_t compute_misc_reserved; /* ordinal32 */
4396 uint32_t compute_dispatch_id; /* ordinal33 */
4397 uint32_t compute_threadgroup_id; /* ordinal34 */
4398 uint32_t compute_relaunch; /* ordinal35 */
4399 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
4400 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
4401 uint32_t compute_wave_restore_control; /* ordinal38 */
4402 uint32_t reserved9; /* ordinal39 */
4403 uint32_t reserved10; /* ordinal40 */
4404 uint32_t reserved11; /* ordinal41 */
4405 uint32_t reserved12; /* ordinal42 */
4406 uint32_t reserved13; /* ordinal43 */
4407 uint32_t reserved14; /* ordinal44 */
4408 uint32_t reserved15; /* ordinal45 */
4409 uint32_t reserved16; /* ordinal46 */
4410 uint32_t reserved17; /* ordinal47 */
4411 uint32_t reserved18; /* ordinal48 */
4412 uint32_t reserved19; /* ordinal49 */
4413 uint32_t reserved20; /* ordinal50 */
4414 uint32_t reserved21; /* ordinal51 */
4415 uint32_t reserved22; /* ordinal52 */
4416 uint32_t reserved23; /* ordinal53 */
4417 uint32_t reserved24; /* ordinal54 */
4418 uint32_t reserved25; /* ordinal55 */
4419 uint32_t reserved26; /* ordinal56 */
4420 uint32_t reserved27; /* ordinal57 */
4421 uint32_t reserved28; /* ordinal58 */
4422 uint32_t reserved29; /* ordinal59 */
4423 uint32_t reserved30; /* ordinal60 */
4424 uint32_t reserved31; /* ordinal61 */
4425 uint32_t reserved32; /* ordinal62 */
4426 uint32_t reserved33; /* ordinal63 */
4427 uint32_t reserved34; /* ordinal64 */
4428 uint32_t compute_user_data_0; /* ordinal65 */
4429 uint32_t compute_user_data_1; /* ordinal66 */
4430 uint32_t compute_user_data_2; /* ordinal67 */
4431 uint32_t compute_user_data_3; /* ordinal68 */
4432 uint32_t compute_user_data_4; /* ordinal69 */
4433 uint32_t compute_user_data_5; /* ordinal70 */
4434 uint32_t compute_user_data_6; /* ordinal71 */
4435 uint32_t compute_user_data_7; /* ordinal72 */
4436 uint32_t compute_user_data_8; /* ordinal73 */
4437 uint32_t compute_user_data_9; /* ordinal74 */
4438 uint32_t compute_user_data_10; /* ordinal75 */
4439 uint32_t compute_user_data_11; /* ordinal76 */
4440 uint32_t compute_user_data_12; /* ordinal77 */
4441 uint32_t compute_user_data_13; /* ordinal78 */
4442 uint32_t compute_user_data_14; /* ordinal79 */
4443 uint32_t compute_user_data_15; /* ordinal80 */
4444 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
4445 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
4446 uint32_t reserved35; /* ordinal83 */
4447 uint32_t reserved36; /* ordinal84 */
4448 uint32_t reserved37; /* ordinal85 */
4449 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
4450 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
4451 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
4452 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
4453 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
4454 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
4455 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
4456 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
4457 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
4458 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
4459 uint32_t reserved38; /* ordinal96 */
4460 uint32_t reserved39; /* ordinal97 */
4461 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
4462 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
4463 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
4464 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
4465 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
4466 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
4467 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
4468 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
4469 uint32_t reserved40; /* ordinal106 */
4470 uint32_t reserved41; /* ordinal107 */
4471 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
4472 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
4473 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
4474 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
4475 uint32_t reserved42; /* ordinal112 */
4476 uint32_t reserved43; /* ordinal113 */
4477 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
4478 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
4479 uint32_t cp_packet_id_lo; /* ordinal116 */
4480 uint32_t cp_packet_id_hi; /* ordinal117 */
4481 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
4482 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
4483 uint32_t gds_save_base_addr_lo; /* ordinal120 */
4484 uint32_t gds_save_base_addr_hi; /* ordinal121 */
4485 uint32_t gds_save_mask_lo; /* ordinal122 */
4486 uint32_t gds_save_mask_hi; /* ordinal123 */
4487 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
4488 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
4489 uint32_t reserved44; /* ordinal126 */
4490 uint32_t reserved45; /* ordinal127 */
4491 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
4492 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
4493 uint32_t cp_hqd_active; /* ordinal130 */
4494 uint32_t cp_hqd_vmid; /* ordinal131 */
4495 uint32_t cp_hqd_persistent_state; /* ordinal132 */
4496 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
4497 uint32_t cp_hqd_queue_priority; /* ordinal134 */
4498 uint32_t cp_hqd_quantum; /* ordinal135 */
4499 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
4500 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
4501 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
4502 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
4503 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
4504 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
4505 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
4506 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
4507 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
4508 uint32_t cp_hqd_pq_control; /* ordinal145 */
4509 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
4510 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
4511 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
4512 uint32_t cp_hqd_ib_control; /* ordinal149 */
4513 uint32_t cp_hqd_iq_timer; /* ordinal150 */
4514 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
4515 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
4516 uint32_t cp_hqd_dma_offload; /* ordinal153 */
4517 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
4518 uint32_t cp_hqd_msg_type; /* ordinal155 */
4519 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
4520 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
4521 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
4522 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
4523 uint32_t cp_hqd_hq_status0; /* ordinal160 */
4524 uint32_t cp_hqd_hq_control0; /* ordinal161 */
4525 uint32_t cp_mqd_control; /* ordinal162 */
4526 uint32_t cp_hqd_hq_status1; /* ordinal163 */
4527 uint32_t cp_hqd_hq_control1; /* ordinal164 */
4528 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
4529 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
4530 uint32_t cp_hqd_eop_control; /* ordinal167 */
4531 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
4532 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
4533 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
4534 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
4535 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
4536 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
4537 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
4538 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
4539 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
4540 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
4541 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
4542 uint32_t cp_hqd_error; /* ordinal179 */
4543 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
4544 uint32_t cp_hqd_eop_dones; /* ordinal181 */
4545 uint32_t reserved46; /* ordinal182 */
4546 uint32_t reserved47; /* ordinal183 */
4547 uint32_t reserved48; /* ordinal184 */
4548 uint32_t reserved49; /* ordinal185 */
4549 uint32_t reserved50; /* ordinal186 */
4550 uint32_t reserved51; /* ordinal187 */
4551 uint32_t reserved52; /* ordinal188 */
4552 uint32_t reserved53; /* ordinal189 */
4553 uint32_t reserved54; /* ordinal190 */
4554 uint32_t reserved55; /* ordinal191 */
4555 uint32_t iqtimer_pkt_header; /* ordinal192 */
4556 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
4557 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
4558 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
4559 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
4560 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
4561 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
4562 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
4563 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
4564 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
4565 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
4566 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
4567 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
4568 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
4569 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
4570 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
4571 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
4572 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
4573 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
4574 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
4575 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
4576 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
4577 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
4578 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
4579 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
4580 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
4581 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
4582 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
4583 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
4584 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
4585 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
4586 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
4587 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
4588 uint32_t reserved56; /* ordinal225 */
4589 uint32_t reserved57; /* ordinal226 */
4590 uint32_t reserved58; /* ordinal227 */
4591 uint32_t set_resources_header; /* ordinal228 */
4592 uint32_t set_resources_dw1; /* ordinal229 */
4593 uint32_t set_resources_dw2; /* ordinal230 */
4594 uint32_t set_resources_dw3; /* ordinal231 */
4595 uint32_t set_resources_dw4; /* ordinal232 */
4596 uint32_t set_resources_dw5; /* ordinal233 */
4597 uint32_t set_resources_dw6; /* ordinal234 */
4598 uint32_t set_resources_dw7; /* ordinal235 */
4599 uint32_t reserved59; /* ordinal236 */
4600 uint32_t reserved60; /* ordinal237 */
4601 uint32_t reserved61; /* ordinal238 */
4602 uint32_t reserved62; /* ordinal239 */
4603 uint32_t reserved63; /* ordinal240 */
4604 uint32_t reserved64; /* ordinal241 */
4605 uint32_t reserved65; /* ordinal242 */
4606 uint32_t reserved66; /* ordinal243 */
4607 uint32_t reserved67; /* ordinal244 */
4608 uint32_t reserved68; /* ordinal245 */
4609 uint32_t reserved69; /* ordinal246 */
4610 uint32_t reserved70; /* ordinal247 */
4611 uint32_t reserved71; /* ordinal248 */
4612 uint32_t reserved72; /* ordinal249 */
4613 uint32_t reserved73; /* ordinal250 */
4614 uint32_t reserved74; /* ordinal251 */
4615 uint32_t reserved75; /* ordinal252 */
4616 uint32_t reserved76; /* ordinal253 */
4617 uint32_t reserved77; /* ordinal254 */
4618 uint32_t reserved78; /* ordinal255 */
4619
4620 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
4621};
4622
4623static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4624{
4625 int i, r;
4626
4627 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4628 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4629
4630 if (ring->mqd_obj) {
4631 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4632 if (unlikely(r != 0))
4633 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
4634
4635 amdgpu_bo_unpin(ring->mqd_obj);
4636 amdgpu_bo_unreserve(ring->mqd_obj);
4637
4638 amdgpu_bo_unref(&ring->mqd_obj);
4639 ring->mqd_obj = NULL;
4640 }
4641 }
4642}
4643
4644static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4645{
4646 int r, i, j;
4647 u32 tmp;
4648 bool use_doorbell = true;
4649 u64 hqd_gpu_addr;
4650 u64 mqd_gpu_addr;
4651 u64 eop_gpu_addr;
4652 u64 wb_gpu_addr;
4653 u32 *buf;
4654 struct vi_mqd *mqd;
4655
4656 /* init the pipes */
4657 mutex_lock(&adev->srbm_mutex);
4658 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
4659 int me = (i < 4) ? 1 : 2;
4660 int pipe = (i < 4) ? i : (i - 4);
4661
4662 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
4663 eop_gpu_addr >>= 8;
4664
4665 vi_srbm_select(adev, me, pipe, 0, 0);
4666
4667 /* write the EOP addr */
4668 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
4669 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4670
4671 /* set the VMID assigned */
4672 WREG32(mmCP_HQD_VMID, 0);
4673
4674 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4675 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4676 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4677 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4678 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
4679 }
4680 vi_srbm_select(adev, 0, 0, 0, 0);
4681 mutex_unlock(&adev->srbm_mutex);
4682
4683 /* init the queues. Just two for now. */
4684 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4685 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4686
4687 if (ring->mqd_obj == NULL) {
4688 r = amdgpu_bo_create(adev,
4689 sizeof(struct vi_mqd),
4690 PAGE_SIZE, true,
4691 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
72d7668b 4692 NULL, &ring->mqd_obj);
aaa36a97
AD
4693 if (r) {
4694 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4695 return r;
4696 }
4697 }
4698
4699 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4700 if (unlikely(r != 0)) {
4701 gfx_v8_0_cp_compute_fini(adev);
4702 return r;
4703 }
4704 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4705 &mqd_gpu_addr);
4706 if (r) {
4707 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4708 gfx_v8_0_cp_compute_fini(adev);
4709 return r;
4710 }
4711 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4712 if (r) {
4713 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4714 gfx_v8_0_cp_compute_fini(adev);
4715 return r;
4716 }
4717
4718 /* init the mqd struct */
4719 memset(buf, 0, sizeof(struct vi_mqd));
4720
4721 mqd = (struct vi_mqd *)buf;
4722 mqd->header = 0xC0310800;
4723 mqd->compute_pipelinestat_enable = 0x00000001;
4724 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4725 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4726 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4727 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4728 mqd->compute_misc_reserved = 0x00000003;
4729
4730 mutex_lock(&adev->srbm_mutex);
4731 vi_srbm_select(adev, ring->me,
4732 ring->pipe,
4733 ring->queue, 0);
4734
4735 /* disable wptr polling */
4736 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4737 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4738 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4739
4740 mqd->cp_hqd_eop_base_addr_lo =
4741 RREG32(mmCP_HQD_EOP_BASE_ADDR);
4742 mqd->cp_hqd_eop_base_addr_hi =
4743 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
4744
4745 /* enable doorbell? */
4746 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4747 if (use_doorbell) {
4748 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4749 } else {
4750 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4751 }
4752 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4753 mqd->cp_hqd_pq_doorbell_control = tmp;
4754
4755 /* disable the queue if it's active */
4756 mqd->cp_hqd_dequeue_request = 0;
4757 mqd->cp_hqd_pq_rptr = 0;
4758 mqd->cp_hqd_pq_wptr= 0;
4759 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4760 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4761 for (j = 0; j < adev->usec_timeout; j++) {
4762 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4763 break;
4764 udelay(1);
4765 }
4766 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4767 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4768 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4769 }
4770
4771 /* set the pointer to the MQD */
4772 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4773 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4774 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4775 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4776
4777 /* set MQD vmid to 0 */
4778 tmp = RREG32(mmCP_MQD_CONTROL);
4779 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4780 WREG32(mmCP_MQD_CONTROL, tmp);
4781 mqd->cp_mqd_control = tmp;
4782
4783 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4784 hqd_gpu_addr = ring->gpu_addr >> 8;
4785 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4786 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4787 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4788 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4789
4790 /* set up the HQD, this is similar to CP_RB0_CNTL */
4791 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4792 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4793 (order_base_2(ring->ring_size / 4) - 1));
4794 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4795 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4796#ifdef __BIG_ENDIAN
4797 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4798#endif
4799 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4800 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4801 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4802 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4803 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
4804 mqd->cp_hqd_pq_control = tmp;
4805
4806 /* set the wb address wether it's enabled or not */
4807 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4808 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4809 mqd->cp_hqd_pq_rptr_report_addr_hi =
4810 upper_32_bits(wb_gpu_addr) & 0xffff;
4811 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4812 mqd->cp_hqd_pq_rptr_report_addr_lo);
4813 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4814 mqd->cp_hqd_pq_rptr_report_addr_hi);
4815
4816 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4817 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4818 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4819 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4820 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
4821 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4822 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4823
4824 /* enable the doorbell if requested */
4825 if (use_doorbell) {
bddf8026 4826 if ((adev->asic_type == CHIP_CARRIZO) ||
e3c7656c 4827 (adev->asic_type == CHIP_FIJI) ||
68182d90 4828 (adev->asic_type == CHIP_STONEY) ||
2cc0c0b5
FC
4829 (adev->asic_type == CHIP_POLARIS11) ||
4830 (adev->asic_type == CHIP_POLARIS10)) {
aaa36a97
AD
4831 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4832 AMDGPU_DOORBELL_KIQ << 2);
4833 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
b8826b0c 4834 AMDGPU_DOORBELL_MEC_RING7 << 2);
aaa36a97
AD
4835 }
4836 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4837 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4838 DOORBELL_OFFSET, ring->doorbell_index);
4839 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4840 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4841 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4842 mqd->cp_hqd_pq_doorbell_control = tmp;
4843
4844 } else {
4845 mqd->cp_hqd_pq_doorbell_control = 0;
4846 }
4847 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
4848 mqd->cp_hqd_pq_doorbell_control);
4849
845253e7
SJ
4850 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4851 ring->wptr = 0;
4852 mqd->cp_hqd_pq_wptr = ring->wptr;
4853 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4854 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4855
aaa36a97
AD
4856 /* set the vmid for the queue */
4857 mqd->cp_hqd_vmid = 0;
4858 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4859
4860 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4861 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4862 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
4863 mqd->cp_hqd_persistent_state = tmp;
68182d90 4864 if (adev->asic_type == CHIP_STONEY ||
2cc0c0b5
FC
4865 adev->asic_type == CHIP_POLARIS11 ||
4866 adev->asic_type == CHIP_POLARIS10) {
3b55ddad
FC
4867 tmp = RREG32(mmCP_ME1_PIPE3_INT_CNTL);
4868 tmp = REG_SET_FIELD(tmp, CP_ME1_PIPE3_INT_CNTL, GENERIC2_INT_ENABLE, 1);
4869 WREG32(mmCP_ME1_PIPE3_INT_CNTL, tmp);
4870 }
aaa36a97
AD
4871
4872 /* activate the queue */
4873 mqd->cp_hqd_active = 1;
4874 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4875
4876 vi_srbm_select(adev, 0, 0, 0, 0);
4877 mutex_unlock(&adev->srbm_mutex);
4878
4879 amdgpu_bo_kunmap(ring->mqd_obj);
4880 amdgpu_bo_unreserve(ring->mqd_obj);
4881 }
4882
4883 if (use_doorbell) {
4884 tmp = RREG32(mmCP_PQ_STATUS);
4885 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4886 WREG32(mmCP_PQ_STATUS, tmp);
4887 }
4888
6e9821b2 4889 gfx_v8_0_cp_compute_enable(adev, true);
aaa36a97
AD
4890
4891 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4892 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
4893
4894 ring->ready = true;
4895 r = amdgpu_ring_test_ring(ring);
4896 if (r)
4897 ring->ready = false;
4898 }
4899
4900 return 0;
4901}
4902
4903static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4904{
4905 int r;
4906
e3c7656c 4907 if (!(adev->flags & AMD_IS_APU))
aaa36a97
AD
4908 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
4909
e61710c5 4910 if (!adev->pp_enabled) {
ba5c2a87
RZ
4911 if (!adev->firmware.smu_load) {
4912 /* legacy firmware loading */
4913 r = gfx_v8_0_cp_gfx_load_microcode(adev);
4914 if (r)
4915 return r;
aaa36a97 4916
ba5c2a87
RZ
4917 r = gfx_v8_0_cp_compute_load_microcode(adev);
4918 if (r)
4919 return r;
4920 } else {
4921 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4922 AMDGPU_UCODE_ID_CP_CE);
4923 if (r)
4924 return -EINVAL;
4925
4926 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4927 AMDGPU_UCODE_ID_CP_PFP);
4928 if (r)
4929 return -EINVAL;
4930
4931 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4932 AMDGPU_UCODE_ID_CP_ME);
4933 if (r)
4934 return -EINVAL;
4935
951e0962
AD
4936 if (adev->asic_type == CHIP_TOPAZ) {
4937 r = gfx_v8_0_cp_compute_load_microcode(adev);
4938 if (r)
4939 return r;
4940 } else {
4941 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
4942 AMDGPU_UCODE_ID_CP_MEC1);
4943 if (r)
4944 return -EINVAL;
4945 }
ba5c2a87 4946 }
aaa36a97
AD
4947 }
4948
4949 r = gfx_v8_0_cp_gfx_resume(adev);
4950 if (r)
4951 return r;
4952
4953 r = gfx_v8_0_cp_compute_resume(adev);
4954 if (r)
4955 return r;
4956
4957 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
4958
4959 return 0;
4960}
4961
4962static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
4963{
4964 gfx_v8_0_cp_gfx_enable(adev, enable);
4965 gfx_v8_0_cp_compute_enable(adev, enable);
4966}
4967
5fc3aeeb 4968static int gfx_v8_0_hw_init(void *handle)
aaa36a97
AD
4969{
4970 int r;
5fc3aeeb 4971 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
4972
4973 gfx_v8_0_init_golden_registers(adev);
4974
4975 gfx_v8_0_gpu_init(adev);
4976
4977 r = gfx_v8_0_rlc_resume(adev);
4978 if (r)
4979 return r;
4980
4981 r = gfx_v8_0_cp_resume(adev);
4982 if (r)
4983 return r;
4984
4985 return r;
4986}
4987
5fc3aeeb 4988static int gfx_v8_0_hw_fini(void *handle)
aaa36a97 4989{
5fc3aeeb 4990 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
4991
1d22a454
AD
4992 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4993 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
aaa36a97
AD
4994 gfx_v8_0_cp_enable(adev, false);
4995 gfx_v8_0_rlc_stop(adev);
4996 gfx_v8_0_cp_compute_fini(adev);
4997
62a86fc2
EH
4998 amdgpu_set_powergating_state(adev,
4999 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_UNGATE);
5000
aaa36a97
AD
5001 return 0;
5002}
5003
5fc3aeeb 5004static int gfx_v8_0_suspend(void *handle)
aaa36a97 5005{
5fc3aeeb 5006 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5007
aaa36a97
AD
5008 return gfx_v8_0_hw_fini(adev);
5009}
5010
5fc3aeeb 5011static int gfx_v8_0_resume(void *handle)
aaa36a97 5012{
5fc3aeeb 5013 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5014
aaa36a97
AD
5015 return gfx_v8_0_hw_init(adev);
5016}
5017
5fc3aeeb 5018static bool gfx_v8_0_is_idle(void *handle)
aaa36a97 5019{
5fc3aeeb 5020 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5021
aaa36a97
AD
5022 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
5023 return false;
5024 else
5025 return true;
5026}
5027
5fc3aeeb 5028static int gfx_v8_0_wait_for_idle(void *handle)
aaa36a97
AD
5029{
5030 unsigned i;
5031 u32 tmp;
5fc3aeeb 5032 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5033
5034 for (i = 0; i < adev->usec_timeout; i++) {
5035 /* read MC_STATUS */
5036 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
5037
5038 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
5039 return 0;
5040 udelay(1);
5041 }
5042 return -ETIMEDOUT;
5043}
5044
5fc3aeeb 5045static int gfx_v8_0_soft_reset(void *handle)
aaa36a97
AD
5046{
5047 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5048 u32 tmp;
5fc3aeeb 5049 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5050
5051 /* GRBM_STATUS */
5052 tmp = RREG32(mmGRBM_STATUS);
5053 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
5054 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
5055 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
5056 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
5057 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
5058 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
5059 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5060 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5061 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5062 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
5063 }
5064
5065 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
5066 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5067 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
5068 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5069 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5070 }
5071
5072 /* GRBM_STATUS2 */
5073 tmp = RREG32(mmGRBM_STATUS2);
5074 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
5075 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
5076 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
5077
5078 /* SRBM_STATUS */
5079 tmp = RREG32(mmSRBM_STATUS);
5080 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
5081 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
5082 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
5083
5084 if (grbm_soft_reset || srbm_soft_reset) {
aaa36a97
AD
5085 /* stop the rlc */
5086 gfx_v8_0_rlc_stop(adev);
5087
5088 /* Disable GFX parsing/prefetching */
5089 gfx_v8_0_cp_gfx_enable(adev, false);
5090
5091 /* Disable MEC parsing/prefetching */
7776a693
AD
5092 gfx_v8_0_cp_compute_enable(adev, false);
5093
5094 if (grbm_soft_reset || srbm_soft_reset) {
5095 tmp = RREG32(mmGMCON_DEBUG);
5096 tmp = REG_SET_FIELD(tmp,
5097 GMCON_DEBUG, GFX_STALL, 1);
5098 tmp = REG_SET_FIELD(tmp,
5099 GMCON_DEBUG, GFX_CLEAR, 1);
5100 WREG32(mmGMCON_DEBUG, tmp);
5101
5102 udelay(50);
5103 }
aaa36a97
AD
5104
5105 if (grbm_soft_reset) {
5106 tmp = RREG32(mmGRBM_SOFT_RESET);
5107 tmp |= grbm_soft_reset;
5108 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5109 WREG32(mmGRBM_SOFT_RESET, tmp);
5110 tmp = RREG32(mmGRBM_SOFT_RESET);
5111
5112 udelay(50);
5113
5114 tmp &= ~grbm_soft_reset;
5115 WREG32(mmGRBM_SOFT_RESET, tmp);
5116 tmp = RREG32(mmGRBM_SOFT_RESET);
5117 }
5118
5119 if (srbm_soft_reset) {
5120 tmp = RREG32(mmSRBM_SOFT_RESET);
5121 tmp |= srbm_soft_reset;
5122 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5123 WREG32(mmSRBM_SOFT_RESET, tmp);
5124 tmp = RREG32(mmSRBM_SOFT_RESET);
5125
5126 udelay(50);
5127
5128 tmp &= ~srbm_soft_reset;
5129 WREG32(mmSRBM_SOFT_RESET, tmp);
5130 tmp = RREG32(mmSRBM_SOFT_RESET);
5131 }
7776a693
AD
5132
5133 if (grbm_soft_reset || srbm_soft_reset) {
5134 tmp = RREG32(mmGMCON_DEBUG);
5135 tmp = REG_SET_FIELD(tmp,
5136 GMCON_DEBUG, GFX_STALL, 0);
5137 tmp = REG_SET_FIELD(tmp,
5138 GMCON_DEBUG, GFX_CLEAR, 0);
5139 WREG32(mmGMCON_DEBUG, tmp);
5140 }
5141
aaa36a97
AD
5142 /* Wait a little for things to settle down */
5143 udelay(50);
aaa36a97
AD
5144 }
5145 return 0;
5146}
5147
5148/**
5149 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
5150 *
5151 * @adev: amdgpu_device pointer
5152 *
5153 * Fetches a GPU clock counter snapshot.
5154 * Returns the 64 bit clock counter snapshot.
5155 */
b95e31fd 5156static uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
aaa36a97
AD
5157{
5158 uint64_t clock;
5159
5160 mutex_lock(&adev->gfx.gpu_clock_mutex);
5161 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
5162 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
5163 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
5164 mutex_unlock(&adev->gfx.gpu_clock_mutex);
5165 return clock;
5166}
5167
5168static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
5169 uint32_t vmid,
5170 uint32_t gds_base, uint32_t gds_size,
5171 uint32_t gws_base, uint32_t gws_size,
5172 uint32_t oa_base, uint32_t oa_size)
5173{
5174 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
5175 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
5176
5177 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
5178 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
5179
5180 oa_base = oa_base >> AMDGPU_OA_SHIFT;
5181 oa_size = oa_size >> AMDGPU_OA_SHIFT;
5182
5183 /* GDS Base */
5184 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5185 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5186 WRITE_DATA_DST_SEL(0)));
5187 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
5188 amdgpu_ring_write(ring, 0);
5189 amdgpu_ring_write(ring, gds_base);
5190
5191 /* GDS Size */
5192 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5193 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5194 WRITE_DATA_DST_SEL(0)));
5195 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
5196 amdgpu_ring_write(ring, 0);
5197 amdgpu_ring_write(ring, gds_size);
5198
5199 /* GWS */
5200 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5201 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5202 WRITE_DATA_DST_SEL(0)));
5203 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
5204 amdgpu_ring_write(ring, 0);
5205 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
5206
5207 /* OA */
5208 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5209 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5210 WRITE_DATA_DST_SEL(0)));
5211 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
5212 amdgpu_ring_write(ring, 0);
5213 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
5214}
5215
b95e31fd
AD
5216static const struct amdgpu_gfx_funcs gfx_v8_0_gfx_funcs = {
5217 .get_gpu_clock_counter = &gfx_v8_0_get_gpu_clock_counter,
05fb7291 5218 .select_se_sh = &gfx_v8_0_select_se_sh,
b95e31fd
AD
5219};
5220
5fc3aeeb 5221static int gfx_v8_0_early_init(void *handle)
aaa36a97 5222{
5fc3aeeb 5223 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
aaa36a97
AD
5224
5225 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
5226 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
b95e31fd 5227 adev->gfx.funcs = &gfx_v8_0_gfx_funcs;
aaa36a97
AD
5228 gfx_v8_0_set_ring_funcs(adev);
5229 gfx_v8_0_set_irq_funcs(adev);
5230 gfx_v8_0_set_gds_init(adev);
dbff57bc 5231 gfx_v8_0_set_rlc_funcs(adev);
aaa36a97
AD
5232
5233 return 0;
5234}
5235
ccba7691
AD
5236static int gfx_v8_0_late_init(void *handle)
5237{
5238 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5239 int r;
5240
1d22a454
AD
5241 r = amdgpu_irq_get(adev, &adev->gfx.priv_reg_irq, 0);
5242 if (r)
5243 return r;
5244
5245 r = amdgpu_irq_get(adev, &adev->gfx.priv_inst_irq, 0);
5246 if (r)
5247 return r;
5248
ccba7691
AD
5249 /* requires IBs so do in late init after IB pool is initialized */
5250 r = gfx_v8_0_do_edc_gpr_workarounds(adev);
5251 if (r)
5252 return r;
5253
62a86fc2
EH
5254 amdgpu_set_powergating_state(adev,
5255 AMD_IP_BLOCK_TYPE_GFX, AMD_PG_STATE_GATE);
5256
ccba7691
AD
5257 return 0;
5258}
5259
c2546f55
AD
5260static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *adev,
5261 bool enable)
62a86fc2
EH
5262{
5263 uint32_t data, temp;
5264
c2546f55
AD
5265 if (adev->asic_type == CHIP_POLARIS11)
5266 /* Send msg to SMU via Powerplay */
5267 amdgpu_set_powergating_state(adev,
5268 AMD_IP_BLOCK_TYPE_SMC,
5269 enable ?
5270 AMD_PG_STATE_GATE : AMD_PG_STATE_UNGATE);
62a86fc2 5271
dad4acc8
TSD
5272 temp = data = RREG32(mmRLC_PG_CNTL);
5273 /* Enable static MGPG */
5274 if (enable)
62a86fc2 5275 data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
dad4acc8 5276 else
62a86fc2
EH
5277 data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
5278
dad4acc8
TSD
5279 if (temp != data)
5280 WREG32(mmRLC_PG_CNTL, data);
62a86fc2
EH
5281}
5282
c2546f55
AD
5283static void gfx_v8_0_enable_gfx_dynamic_mg_power_gating(struct amdgpu_device *adev,
5284 bool enable)
62a86fc2
EH
5285{
5286 uint32_t data, temp;
5287
dad4acc8
TSD
5288 temp = data = RREG32(mmRLC_PG_CNTL);
5289 /* Enable dynamic MGPG */
5290 if (enable)
62a86fc2 5291 data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
dad4acc8 5292 else
62a86fc2
EH
5293 data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
5294
dad4acc8
TSD
5295 if (temp != data)
5296 WREG32(mmRLC_PG_CNTL, data);
62a86fc2
EH
5297}
5298
2cc0c0b5 5299static void polaris11_enable_gfx_quick_mg_power_gating(struct amdgpu_device *adev,
62a86fc2
EH
5300 bool enable)
5301{
5302 uint32_t data, temp;
5303
dad4acc8
TSD
5304 temp = data = RREG32(mmRLC_PG_CNTL);
5305 /* Enable quick PG */
5306 if (enable)
78f73bf0 5307 data |= RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
dad4acc8 5308 else
78f73bf0 5309 data &= ~RLC_PG_CNTL__QUICK_PG_ENABLE_MASK;
62a86fc2 5310
dad4acc8
TSD
5311 if (temp != data)
5312 WREG32(mmRLC_PG_CNTL, data);
62a86fc2
EH
5313}
5314
2c547165
AD
5315static void cz_enable_gfx_cg_power_gating(struct amdgpu_device *adev,
5316 bool enable)
5317{
5318 u32 data, orig;
5319
5320 orig = data = RREG32(mmRLC_PG_CNTL);
5321
5322 if (enable)
5323 data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5324 else
5325 data &= ~RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
5326
5327 if (orig != data)
5328 WREG32(mmRLC_PG_CNTL, data);
5329}
5330
5331static void cz_enable_gfx_pipeline_power_gating(struct amdgpu_device *adev,
5332 bool enable)
5333{
5334 u32 data, orig;
5335
5336 orig = data = RREG32(mmRLC_PG_CNTL);
5337
5338 if (enable)
5339 data |= RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5340 else
5341 data &= ~RLC_PG_CNTL__GFX_PIPELINE_PG_ENABLE_MASK;
5342
5343 if (orig != data)
5344 WREG32(mmRLC_PG_CNTL, data);
5345
5346 /* Read any GFX register to wake up GFX. */
5347 if (!enable)
5348 data = RREG32(mmDB_RENDER_CONTROL);
5349}
5350
5351static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev,
5352 bool enable)
5353{
5354 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) {
5355 cz_enable_gfx_cg_power_gating(adev, true);
5356 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PIPELINE)
5357 cz_enable_gfx_pipeline_power_gating(adev, true);
5358 } else {
5359 cz_enable_gfx_cg_power_gating(adev, false);
5360 cz_enable_gfx_pipeline_power_gating(adev, false);
5361 }
5362}
5363
5fc3aeeb 5364static int gfx_v8_0_set_powergating_state(void *handle,
5365 enum amd_powergating_state state)
aaa36a97 5366{
62a86fc2 5367 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2c547165 5368 bool enable = (state == AMD_PG_STATE_GATE) ? true : false;
62a86fc2
EH
5369
5370 if (!(adev->pg_flags & AMD_PG_SUPPORT_GFX_PG))
5371 return 0;
5372
5373 switch (adev->asic_type) {
2c547165
AD
5374 case CHIP_CARRIZO:
5375 case CHIP_STONEY:
5376 if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)
5377 cz_update_gfx_cg_power_gating(adev, enable);
5378
5379 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5380 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5381 else
5382 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5383
5384 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5385 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5386 else
5387 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5388 break;
2cc0c0b5 5389 case CHIP_POLARIS11:
7ba0eb6d
AD
5390 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG) && enable)
5391 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, true);
5392 else
5393 gfx_v8_0_enable_gfx_static_mg_power_gating(adev, false);
5394
5395 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG) && enable)
5396 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, true);
5397 else
5398 gfx_v8_0_enable_gfx_dynamic_mg_power_gating(adev, false);
5399
5400 if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_QUICK_MG) && enable)
5401 polaris11_enable_gfx_quick_mg_power_gating(adev, true);
62a86fc2 5402 else
7ba0eb6d 5403 polaris11_enable_gfx_quick_mg_power_gating(adev, false);
62a86fc2
EH
5404 break;
5405 default:
5406 break;
5407 }
5408
aaa36a97
AD
5409 return 0;
5410}
5411
79deaaf4 5412static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev,
14698b6c 5413 uint32_t reg_addr, uint32_t cmd)
6e378858
EH
5414{
5415 uint32_t data;
5416
9559ef5b 5417 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
6e378858
EH
5418
5419 WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5420 WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5421
5422 data = RREG32(mmRLC_SERDES_WR_CTRL);
146f256f
AD
5423 if (adev->asic_type == CHIP_STONEY)
5424 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
6e378858
EH
5425 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5426 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5427 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5428 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5429 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5430 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5431 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
6e378858 5432 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
146f256f
AD
5433 else
5434 data &= ~(RLC_SERDES_WR_CTRL__WRITE_COMMAND_MASK |
5435 RLC_SERDES_WR_CTRL__READ_COMMAND_MASK |
5436 RLC_SERDES_WR_CTRL__P1_SELECT_MASK |
5437 RLC_SERDES_WR_CTRL__P2_SELECT_MASK |
5438 RLC_SERDES_WR_CTRL__RDDATA_RESET_MASK |
5439 RLC_SERDES_WR_CTRL__POWER_DOWN_MASK |
5440 RLC_SERDES_WR_CTRL__POWER_UP_MASK |
5441 RLC_SERDES_WR_CTRL__SHORT_FORMAT_MASK |
5442 RLC_SERDES_WR_CTRL__BPM_DATA_MASK |
5443 RLC_SERDES_WR_CTRL__REG_ADDR_MASK |
5444 RLC_SERDES_WR_CTRL__SRBM_OVERRIDE_MASK);
6e378858 5445 data |= (RLC_SERDES_WR_CTRL__RSVD_BPM_ADDR_MASK |
146f256f
AD
5446 (cmd << RLC_SERDES_WR_CTRL__BPM_DATA__SHIFT) |
5447 (reg_addr << RLC_SERDES_WR_CTRL__REG_ADDR__SHIFT) |
5448 (0xff << RLC_SERDES_WR_CTRL__BPM_ADDR__SHIFT));
6e378858
EH
5449
5450 WREG32(mmRLC_SERDES_WR_CTRL, data);
5451}
5452
dbff57bc
AD
5453#define MSG_ENTER_RLC_SAFE_MODE 1
5454#define MSG_EXIT_RLC_SAFE_MODE 0
5455
5456#define RLC_GPR_REG2__REQ_MASK 0x00000001
5457#define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001
5458#define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e
5459
5460static void cz_enter_rlc_safe_mode(struct amdgpu_device *adev)
5461{
5462 u32 data = 0;
5463 unsigned i;
5464
5465 data = RREG32(mmRLC_CNTL);
5466 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5467 return;
5468
5469 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5470 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5471 AMD_PG_SUPPORT_GFX_DMG))) {
5472 data |= RLC_GPR_REG2__REQ_MASK;
5473 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5474 data |= (MSG_ENTER_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5475 WREG32(mmRLC_GPR_REG2, data);
5476
5477 for (i = 0; i < adev->usec_timeout; i++) {
5478 if ((RREG32(mmRLC_GPM_STAT) &
5479 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5480 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5481 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5482 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5483 break;
5484 udelay(1);
5485 }
5486
5487 for (i = 0; i < adev->usec_timeout; i++) {
5488 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5489 break;
5490 udelay(1);
5491 }
5492 adev->gfx.rlc.in_safe_mode = true;
5493 }
5494}
5495
5496static void cz_exit_rlc_safe_mode(struct amdgpu_device *adev)
5497{
5498 u32 data;
5499 unsigned i;
5500
5501 data = RREG32(mmRLC_CNTL);
5502 if ((data & RLC_CNTL__RLC_ENABLE_F32_MASK) == 0)
5503 return;
5504
5505 if ((adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) ||
5506 (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_GFX_SMG |
5507 AMD_PG_SUPPORT_GFX_DMG))) {
5508 data |= RLC_GPR_REG2__REQ_MASK;
5509 data &= ~RLC_GPR_REG2__MESSAGE_MASK;
5510 data |= (MSG_EXIT_RLC_SAFE_MODE << RLC_GPR_REG2__MESSAGE__SHIFT);
5511 WREG32(mmRLC_GPR_REG2, data);
5512 adev->gfx.rlc.in_safe_mode = false;
5513 }
5514
5515 for (i = 0; i < adev->usec_timeout; i++) {
5516 if ((RREG32(mmRLC_GPR_REG2) & RLC_GPR_REG2__REQ_MASK) == 0)
5517 break;
5518 udelay(1);
5519 }
5520}
5521
5522static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev)
5523{
5524 u32 data;
5525 unsigned i;
5526
5527 data = RREG32(mmRLC_CNTL);
5528 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5529 return;
5530
5531 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5532 data |= RLC_SAFE_MODE__CMD_MASK;
5533 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5534 data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT);
5535 WREG32(mmRLC_SAFE_MODE, data);
5536
5537 for (i = 0; i < adev->usec_timeout; i++) {
5538 if ((RREG32(mmRLC_GPM_STAT) &
5539 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5540 RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) ==
5541 (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK |
5542 RLC_GPM_STAT__GFX_POWER_STATUS_MASK))
5543 break;
5544 udelay(1);
5545 }
5546
5547 for (i = 0; i < adev->usec_timeout; i++) {
5548 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5549 break;
5550 udelay(1);
5551 }
5552 adev->gfx.rlc.in_safe_mode = true;
5553 }
5554}
5555
5556static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev)
5557{
5558 u32 data = 0;
5559 unsigned i;
5560
5561 data = RREG32(mmRLC_CNTL);
5562 if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK))
5563 return;
5564
5565 if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) {
5566 if (adev->gfx.rlc.in_safe_mode) {
5567 data |= RLC_SAFE_MODE__CMD_MASK;
5568 data &= ~RLC_SAFE_MODE__MESSAGE_MASK;
5569 WREG32(mmRLC_SAFE_MODE, data);
5570 adev->gfx.rlc.in_safe_mode = false;
5571 }
5572 }
5573
5574 for (i = 0; i < adev->usec_timeout; i++) {
5575 if ((RREG32(mmRLC_SAFE_MODE) & RLC_SAFE_MODE__CMD_MASK) == 0)
5576 break;
5577 udelay(1);
5578 }
5579}
5580
5581static void gfx_v8_0_nop_enter_rlc_safe_mode(struct amdgpu_device *adev)
5582{
5583 adev->gfx.rlc.in_safe_mode = true;
5584}
5585
5586static void gfx_v8_0_nop_exit_rlc_safe_mode(struct amdgpu_device *adev)
5587{
5588 adev->gfx.rlc.in_safe_mode = false;
5589}
5590
5591static const struct amdgpu_rlc_funcs cz_rlc_funcs = {
5592 .enter_safe_mode = cz_enter_rlc_safe_mode,
5593 .exit_safe_mode = cz_exit_rlc_safe_mode
5594};
5595
5596static const struct amdgpu_rlc_funcs iceland_rlc_funcs = {
5597 .enter_safe_mode = iceland_enter_rlc_safe_mode,
5598 .exit_safe_mode = iceland_exit_rlc_safe_mode
5599};
5600
5601static const struct amdgpu_rlc_funcs gfx_v8_0_nop_rlc_funcs = {
5602 .enter_safe_mode = gfx_v8_0_nop_enter_rlc_safe_mode,
5603 .exit_safe_mode = gfx_v8_0_nop_exit_rlc_safe_mode
5604};
5605
5606static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev,
5607 bool enable)
6e378858
EH
5608{
5609 uint32_t temp, data;
5610
dbff57bc
AD
5611 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5612
6e378858 5613 /* It is disabled by HW by default */
14698b6c
AD
5614 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
5615 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
5616 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_RLC_LS) {
5617 /* 1 - RLC memory Light sleep */
5618 temp = data = RREG32(mmRLC_MEM_SLP_CNTL);
5619 data |= RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5620 if (temp != data)
5621 WREG32(mmRLC_MEM_SLP_CNTL, data);
5622 }
6e378858 5623
14698b6c
AD
5624 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
5625 /* 2 - CP memory Light sleep */
5626 temp = data = RREG32(mmCP_MEM_SLP_CNTL);
5627 data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5628 if (temp != data)
5629 WREG32(mmCP_MEM_SLP_CNTL, data);
5630 }
5631 }
6e378858
EH
5632
5633 /* 3 - RLC_CGTT_MGCG_OVERRIDE */
5634 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
dbff57bc
AD
5635 if (adev->flags & AMD_IS_APU)
5636 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5637 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5638 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK);
5639 else
5640 data &= ~(RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5641 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5642 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5643 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
6e378858
EH
5644
5645 if (temp != data)
5646 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5647
5648 /* 4 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5649 gfx_v8_0_wait_for_rlc_serdes(adev);
5650
5651 /* 5 - clear mgcg override */
79deaaf4 5652 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858 5653
14698b6c
AD
5654 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
5655 /* 6 - Enable CGTS(Tree Shade) MGCG /MGLS */
5656 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5657 data &= ~(CGTS_SM_CTRL_REG__SM_MODE_MASK);
5658 data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
5659 data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
5660 data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
5661 if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
5662 (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
5663 data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
5664 data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
5665 data |= (0x96 << CGTS_SM_CTRL_REG__ON_MONITOR_ADD__SHIFT);
5666 if (temp != data)
5667 WREG32(mmCGTS_SM_CTRL_REG, data);
5668 }
6e378858
EH
5669 udelay(50);
5670
5671 /* 7 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5672 gfx_v8_0_wait_for_rlc_serdes(adev);
5673 } else {
5674 /* 1 - MGCG_OVERRIDE[0] for CP and MGCG_OVERRIDE[1] for RLC */
5675 temp = data = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5676 data |= (RLC_CGTT_MGCG_OVERRIDE__CPF_MASK |
5677 RLC_CGTT_MGCG_OVERRIDE__RLC_MASK |
5678 RLC_CGTT_MGCG_OVERRIDE__MGCG_MASK |
5679 RLC_CGTT_MGCG_OVERRIDE__GRBM_MASK);
5680 if (temp != data)
5681 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data);
5682
5683 /* 2 - disable MGLS in RLC */
5684 data = RREG32(mmRLC_MEM_SLP_CNTL);
5685 if (data & RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK) {
5686 data &= ~RLC_MEM_SLP_CNTL__RLC_MEM_LS_EN_MASK;
5687 WREG32(mmRLC_MEM_SLP_CNTL, data);
5688 }
5689
5690 /* 3 - disable MGLS in CP */
5691 data = RREG32(mmCP_MEM_SLP_CNTL);
5692 if (data & CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK) {
5693 data &= ~CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
5694 WREG32(mmCP_MEM_SLP_CNTL, data);
5695 }
5696
5697 /* 4 - Disable CGTS(Tree Shade) MGCG and MGLS */
5698 temp = data = RREG32(mmCGTS_SM_CTRL_REG);
5699 data |= (CGTS_SM_CTRL_REG__OVERRIDE_MASK |
5700 CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK);
5701 if (temp != data)
5702 WREG32(mmCGTS_SM_CTRL_REG, data);
5703
5704 /* 5 - wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5705 gfx_v8_0_wait_for_rlc_serdes(adev);
5706
5707 /* 6 - set mgcg override */
79deaaf4 5708 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_MGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
5709
5710 udelay(50);
5711
5712 /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5713 gfx_v8_0_wait_for_rlc_serdes(adev);
5714 }
dbff57bc
AD
5715
5716 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858
EH
5717}
5718
dbff57bc
AD
5719static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev,
5720 bool enable)
6e378858
EH
5721{
5722 uint32_t temp, temp1, data, data1;
5723
5724 temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
5725
dbff57bc
AD
5726 adev->gfx.rlc.funcs->enter_safe_mode(adev);
5727
14698b6c 5728 if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
6e378858
EH
5729 /* 1 enable cntx_empty_int_enable/cntx_busy_int_enable/
5730 * Cmp_busy/GFX_Idle interrupts
5731 */
5732 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
5733
5734 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5735 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK;
5736 if (temp1 != data1)
5737 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5738
5739 /* 2 wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5740 gfx_v8_0_wait_for_rlc_serdes(adev);
5741
5742 /* 3 - clear cgcg override */
79deaaf4 5743 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, CLE_BPM_SERDES_CMD);
6e378858
EH
5744
5745 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5746 gfx_v8_0_wait_for_rlc_serdes(adev);
5747
5748 /* 4 - write cmd to set CGLS */
79deaaf4 5749 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, SET_BPM_SERDES_CMD);
6e378858
EH
5750
5751 /* 5 - enable cgcg */
5752 data |= RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK;
5753
14698b6c
AD
5754 if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGLS) {
5755 /* enable cgls*/
5756 data |= RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
6e378858 5757
14698b6c
AD
5758 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5759 data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK;
6e378858 5760
14698b6c
AD
5761 if (temp1 != data1)
5762 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5763 } else {
5764 data &= ~RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK;
5765 }
6e378858
EH
5766
5767 if (temp != data)
5768 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5769 } else {
5770 /* disable cntx_empty_int_enable & GFX Idle interrupt */
5771 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
5772
5773 /* TEST CGCG */
5774 temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE);
5775 data1 |= (RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK |
5776 RLC_CGTT_MGCG_OVERRIDE__CGLS_MASK);
5777 if (temp1 != data1)
5778 WREG32(mmRLC_CGTT_MGCG_OVERRIDE, data1);
5779
5780 /* read gfx register to wake up cgcg */
5781 RREG32(mmCB_CGTT_SCLK_CTRL);
5782 RREG32(mmCB_CGTT_SCLK_CTRL);
5783 RREG32(mmCB_CGTT_SCLK_CTRL);
5784 RREG32(mmCB_CGTT_SCLK_CTRL);
5785
5786 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5787 gfx_v8_0_wait_for_rlc_serdes(adev);
5788
5789 /* write cmd to Set CGCG Overrride */
79deaaf4 5790 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGCG_OVERRIDE, SET_BPM_SERDES_CMD);
6e378858
EH
5791
5792 /* wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */
5793 gfx_v8_0_wait_for_rlc_serdes(adev);
5794
5795 /* write cmd to Clear CGLS */
79deaaf4 5796 gfx_v8_0_send_serdes_cmd(adev, BPM_REG_CGLS_EN, CLE_BPM_SERDES_CMD);
6e378858
EH
5797
5798 /* disable cgcg, cgls should be disabled too. */
5799 data &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK |
14698b6c 5800 RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK);
6e378858
EH
5801 if (temp != data)
5802 WREG32(mmRLC_CGCG_CGLS_CTRL, data);
5803 }
dbff57bc 5804
7894745a
TSD
5805 gfx_v8_0_wait_for_rlc_serdes(adev);
5806
dbff57bc 5807 adev->gfx.rlc.funcs->exit_safe_mode(adev);
6e378858 5808}
dbff57bc
AD
5809static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev,
5810 bool enable)
6e378858
EH
5811{
5812 if (enable) {
5813 /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS)
5814 * === MGCG + MGLS + TS(CG/LS) ===
5815 */
dbff57bc
AD
5816 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
5817 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
6e378858
EH
5818 } else {
5819 /* CGCG/CGLS should be disabled before MGCG/MGLS/TS(CG/LS)
5820 * === CGCG + CGLS ===
5821 */
dbff57bc
AD
5822 gfx_v8_0_update_coarse_grain_clock_gating(adev, enable);
5823 gfx_v8_0_update_medium_grain_clock_gating(adev, enable);
6e378858
EH
5824 }
5825 return 0;
5826}
5827
5fc3aeeb 5828static int gfx_v8_0_set_clockgating_state(void *handle,
5829 enum amd_clockgating_state state)
aaa36a97 5830{
6e378858
EH
5831 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
5832
5833 switch (adev->asic_type) {
5834 case CHIP_FIJI:
dbff57bc
AD
5835 case CHIP_CARRIZO:
5836 case CHIP_STONEY:
5837 gfx_v8_0_update_gfx_clock_gating(adev,
5838 state == AMD_CG_STATE_GATE ? true : false);
6e378858
EH
5839 break;
5840 default:
5841 break;
5842 }
aaa36a97
AD
5843 return 0;
5844}
5845
5846static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
5847{
5848 u32 rptr;
5849
5850 rptr = ring->adev->wb.wb[ring->rptr_offs];
5851
5852 return rptr;
5853}
5854
5855static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
5856{
5857 struct amdgpu_device *adev = ring->adev;
5858 u32 wptr;
5859
5860 if (ring->use_doorbell)
5861 /* XXX check if swapping is necessary on BE */
5862 wptr = ring->adev->wb.wb[ring->wptr_offs];
5863 else
5864 wptr = RREG32(mmCP_RB0_WPTR);
5865
5866 return wptr;
5867}
5868
5869static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
5870{
5871 struct amdgpu_device *adev = ring->adev;
5872
5873 if (ring->use_doorbell) {
5874 /* XXX check if swapping is necessary on BE */
5875 adev->wb.wb[ring->wptr_offs] = ring->wptr;
5876 WDOORBELL32(ring->doorbell_index, ring->wptr);
5877 } else {
5878 WREG32(mmCP_RB0_WPTR, ring->wptr);
5879 (void)RREG32(mmCP_RB0_WPTR);
5880 }
5881}
5882
d2edb07b 5883static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
aaa36a97
AD
5884{
5885 u32 ref_and_mask, reg_mem_engine;
5886
5887 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
5888 switch (ring->me) {
5889 case 1:
5890 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
5891 break;
5892 case 2:
5893 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
5894 break;
5895 default:
5896 return;
5897 }
5898 reg_mem_engine = 0;
5899 } else {
5900 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
5901 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
5902 }
5903
5904 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5905 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
5906 WAIT_REG_MEM_FUNCTION(3) | /* == */
5907 reg_mem_engine));
5908 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
5909 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
5910 amdgpu_ring_write(ring, ref_and_mask);
5911 amdgpu_ring_write(ring, ref_and_mask);
5912 amdgpu_ring_write(ring, 0x20); /* poll interval */
5913}
5914
d35db561
CZ
5915static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
5916{
5917 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5918 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5919 WRITE_DATA_DST_SEL(0) |
5920 WR_CONFIRM));
5921 amdgpu_ring_write(ring, mmHDP_DEBUG0);
5922 amdgpu_ring_write(ring, 0);
5923 amdgpu_ring_write(ring, 1);
5924
5925}
5926
93323131 5927static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
d88bf583
CK
5928 struct amdgpu_ib *ib,
5929 unsigned vm_id, bool ctx_switch)
aaa36a97
AD
5930{
5931 u32 header, control = 0;
aaa36a97 5932
aaa36a97 5933 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
f153d286 5934 if (ctx_switch) {
aaa36a97
AD
5935 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
5936 amdgpu_ring_write(ring, 0);
aaa36a97
AD
5937 }
5938
de807f81 5939 if (ib->flags & AMDGPU_IB_FLAG_CE)
aaa36a97
AD
5940 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
5941 else
5942 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
5943
d88bf583 5944 control |= ib->length_dw | (vm_id << 24);
aaa36a97
AD
5945
5946 amdgpu_ring_write(ring, header);
5947 amdgpu_ring_write(ring,
5948#ifdef __BIG_ENDIAN
5949 (2 << 0) |
5950#endif
5951 (ib->gpu_addr & 0xFFFFFFFC));
5952 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5953 amdgpu_ring_write(ring, control);
5954}
5955
93323131 5956static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
d88bf583
CK
5957 struct amdgpu_ib *ib,
5958 unsigned vm_id, bool ctx_switch)
93323131 5959{
33b7ed01 5960 u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vm_id << 24);
93323131 5961
33b7ed01 5962 amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
93323131 5963 amdgpu_ring_write(ring,
5964#ifdef __BIG_ENDIAN
5965 (2 << 0) |
5966#endif
5967 (ib->gpu_addr & 0xFFFFFFFC));
5968 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
5969 amdgpu_ring_write(ring, control);
5970}
5971
aaa36a97 5972static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
890ee23f 5973 u64 seq, unsigned flags)
aaa36a97 5974{
890ee23f
CZ
5975 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
5976 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
5977
aaa36a97
AD
5978 /* EVENT_WRITE_EOP - flush caches, send int */
5979 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
5980 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
5981 EOP_TC_ACTION_EN |
f84e63f2 5982 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
5983 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
5984 EVENT_INDEX(5)));
5985 amdgpu_ring_write(ring, addr & 0xfffffffc);
90bea0ab 5986 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
890ee23f 5987 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
5988 amdgpu_ring_write(ring, lower_32_bits(seq));
5989 amdgpu_ring_write(ring, upper_32_bits(seq));
22c01cc4 5990
aaa36a97
AD
5991}
5992
b8c7b39e 5993static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
aaa36a97
AD
5994{
5995 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5907a0d8 5996 uint32_t seq = ring->fence_drv.sync_seq;
22c01cc4
AA
5997 uint64_t addr = ring->fence_drv.gpu_addr;
5998
5999 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6000 amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
9cac5373
CZ
6001 WAIT_REG_MEM_FUNCTION(3) | /* equal */
6002 WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
22c01cc4
AA
6003 amdgpu_ring_write(ring, addr & 0xfffffffc);
6004 amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
6005 amdgpu_ring_write(ring, seq);
6006 amdgpu_ring_write(ring, 0xffffffff);
6007 amdgpu_ring_write(ring, 4); /* poll interval */
aaa36a97 6008
5c3422b0 6009 if (usepfp) {
6010 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
6011 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6012 amdgpu_ring_write(ring, 0);
6013 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6014 amdgpu_ring_write(ring, 0);
6015 }
b8c7b39e
CK
6016}
6017
6018static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
6019 unsigned vm_id, uint64_t pd_addr)
6020{
6021 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
5c3422b0 6022
aaa36a97
AD
6023 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6024 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
20a85ff8
CK
6025 WRITE_DATA_DST_SEL(0)) |
6026 WR_CONFIRM);
aaa36a97
AD
6027 if (vm_id < 8) {
6028 amdgpu_ring_write(ring,
6029 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
6030 } else {
6031 amdgpu_ring_write(ring,
6032 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
6033 }
6034 amdgpu_ring_write(ring, 0);
6035 amdgpu_ring_write(ring, pd_addr >> 12);
6036
aaa36a97
AD
6037 /* bits 0-15 are the VM contexts0-15 */
6038 /* invalidate the cache */
6039 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6040 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6041 WRITE_DATA_DST_SEL(0)));
6042 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6043 amdgpu_ring_write(ring, 0);
6044 amdgpu_ring_write(ring, 1 << vm_id);
6045
6046 /* wait for the invalidate to complete */
6047 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6048 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6049 WAIT_REG_MEM_FUNCTION(0) | /* always */
6050 WAIT_REG_MEM_ENGINE(0))); /* me */
6051 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
6052 amdgpu_ring_write(ring, 0);
6053 amdgpu_ring_write(ring, 0); /* ref */
6054 amdgpu_ring_write(ring, 0); /* mask */
6055 amdgpu_ring_write(ring, 0x20); /* poll interval */
6056
6057 /* compute doesn't have PFP */
6058 if (usepfp) {
6059 /* sync PFP to ME, otherwise we might get invalid PFP reads */
6060 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6061 amdgpu_ring_write(ring, 0x0);
5c3422b0 6062 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6063 amdgpu_ring_write(ring, 0);
6064 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
6065 amdgpu_ring_write(ring, 0);
aaa36a97
AD
6066 }
6067}
6068
aaa36a97
AD
6069static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
6070{
6071 return ring->adev->wb.wb[ring->rptr_offs];
6072}
6073
6074static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
6075{
6076 return ring->adev->wb.wb[ring->wptr_offs];
6077}
6078
6079static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
6080{
6081 struct amdgpu_device *adev = ring->adev;
6082
6083 /* XXX check if swapping is necessary on BE */
6084 adev->wb.wb[ring->wptr_offs] = ring->wptr;
6085 WDOORBELL32(ring->doorbell_index, ring->wptr);
6086}
6087
6088static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6089 u64 addr, u64 seq,
890ee23f 6090 unsigned flags)
aaa36a97 6091{
890ee23f
CZ
6092 bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
6093 bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
6094
aaa36a97
AD
6095 /* RELEASE_MEM - flush caches, send int */
6096 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
6097 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
6098 EOP_TC_ACTION_EN |
a3d5aaa8 6099 EOP_TC_WB_ACTION_EN |
aaa36a97
AD
6100 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
6101 EVENT_INDEX(5)));
890ee23f 6102 amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
aaa36a97
AD
6103 amdgpu_ring_write(ring, addr & 0xfffffffc);
6104 amdgpu_ring_write(ring, upper_32_bits(addr));
6105 amdgpu_ring_write(ring, lower_32_bits(seq));
6106 amdgpu_ring_write(ring, upper_32_bits(seq));
6107}
6108
6109static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
6110 enum amdgpu_interrupt_state state)
6111{
6112 u32 cp_int_cntl;
6113
6114 switch (state) {
6115 case AMDGPU_IRQ_STATE_DISABLE:
6116 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6117 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6118 TIME_STAMP_INT_ENABLE, 0);
6119 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6120 break;
6121 case AMDGPU_IRQ_STATE_ENABLE:
6122 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6123 cp_int_cntl =
6124 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6125 TIME_STAMP_INT_ENABLE, 1);
6126 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6127 break;
6128 default:
6129 break;
6130 }
6131}
6132
6133static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
6134 int me, int pipe,
6135 enum amdgpu_interrupt_state state)
6136{
6137 u32 mec_int_cntl, mec_int_cntl_reg;
6138
6139 /*
6140 * amdgpu controls only pipe 0 of MEC1. That's why this function only
6141 * handles the setting of interrupts for this specific pipe. All other
6142 * pipes' interrupts are set by amdkfd.
6143 */
6144
6145 if (me == 1) {
6146 switch (pipe) {
6147 case 0:
6148 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
6149 break;
6150 default:
6151 DRM_DEBUG("invalid pipe %d\n", pipe);
6152 return;
6153 }
6154 } else {
6155 DRM_DEBUG("invalid me %d\n", me);
6156 return;
6157 }
6158
6159 switch (state) {
6160 case AMDGPU_IRQ_STATE_DISABLE:
6161 mec_int_cntl = RREG32(mec_int_cntl_reg);
6162 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6163 TIME_STAMP_INT_ENABLE, 0);
6164 WREG32(mec_int_cntl_reg, mec_int_cntl);
6165 break;
6166 case AMDGPU_IRQ_STATE_ENABLE:
6167 mec_int_cntl = RREG32(mec_int_cntl_reg);
6168 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
6169 TIME_STAMP_INT_ENABLE, 1);
6170 WREG32(mec_int_cntl_reg, mec_int_cntl);
6171 break;
6172 default:
6173 break;
6174 }
6175}
6176
6177static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
6178 struct amdgpu_irq_src *source,
6179 unsigned type,
6180 enum amdgpu_interrupt_state state)
6181{
6182 u32 cp_int_cntl;
6183
6184 switch (state) {
6185 case AMDGPU_IRQ_STATE_DISABLE:
6186 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6187 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6188 PRIV_REG_INT_ENABLE, 0);
6189 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6190 break;
6191 case AMDGPU_IRQ_STATE_ENABLE:
6192 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6193 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
04ab3b76 6194 PRIV_REG_INT_ENABLE, 1);
aaa36a97
AD
6195 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6196 break;
6197 default:
6198 break;
6199 }
6200
6201 return 0;
6202}
6203
6204static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
6205 struct amdgpu_irq_src *source,
6206 unsigned type,
6207 enum amdgpu_interrupt_state state)
6208{
6209 u32 cp_int_cntl;
6210
6211 switch (state) {
6212 case AMDGPU_IRQ_STATE_DISABLE:
6213 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6214 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6215 PRIV_INSTR_INT_ENABLE, 0);
6216 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6217 break;
6218 case AMDGPU_IRQ_STATE_ENABLE:
6219 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
6220 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
6221 PRIV_INSTR_INT_ENABLE, 1);
6222 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
6223 break;
6224 default:
6225 break;
6226 }
6227
6228 return 0;
6229}
6230
6231static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
6232 struct amdgpu_irq_src *src,
6233 unsigned type,
6234 enum amdgpu_interrupt_state state)
6235{
6236 switch (type) {
6237 case AMDGPU_CP_IRQ_GFX_EOP:
6238 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
6239 break;
6240 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
6241 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
6242 break;
6243 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
6244 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
6245 break;
6246 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
6247 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
6248 break;
6249 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
6250 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
6251 break;
6252 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
6253 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
6254 break;
6255 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
6256 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
6257 break;
6258 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
6259 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
6260 break;
6261 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
6262 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
6263 break;
6264 default:
6265 break;
6266 }
6267 return 0;
6268}
6269
6270static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
6271 struct amdgpu_irq_src *source,
6272 struct amdgpu_iv_entry *entry)
6273{
6274 int i;
6275 u8 me_id, pipe_id, queue_id;
6276 struct amdgpu_ring *ring;
6277
6278 DRM_DEBUG("IH: CP EOP\n");
6279 me_id = (entry->ring_id & 0x0c) >> 2;
6280 pipe_id = (entry->ring_id & 0x03) >> 0;
6281 queue_id = (entry->ring_id & 0x70) >> 4;
6282
6283 switch (me_id) {
6284 case 0:
6285 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
6286 break;
6287 case 1:
6288 case 2:
6289 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
6290 ring = &adev->gfx.compute_ring[i];
6291 /* Per-queue interrupt is supported for MEC starting from VI.
6292 * The interrupt can only be enabled/disabled per pipe instead of per queue.
6293 */
6294 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
6295 amdgpu_fence_process(ring);
6296 }
6297 break;
6298 }
6299 return 0;
6300}
6301
6302static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
6303 struct amdgpu_irq_src *source,
6304 struct amdgpu_iv_entry *entry)
6305{
6306 DRM_ERROR("Illegal register access in command stream\n");
6307 schedule_work(&adev->reset_work);
6308 return 0;
6309}
6310
6311static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6312 struct amdgpu_irq_src *source,
6313 struct amdgpu_iv_entry *entry)
6314{
6315 DRM_ERROR("Illegal instruction in command stream\n");
6316 schedule_work(&adev->reset_work);
6317 return 0;
6318}
6319
5fc3aeeb 6320const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
88a907d6 6321 .name = "gfx_v8_0",
aaa36a97 6322 .early_init = gfx_v8_0_early_init,
ccba7691 6323 .late_init = gfx_v8_0_late_init,
aaa36a97
AD
6324 .sw_init = gfx_v8_0_sw_init,
6325 .sw_fini = gfx_v8_0_sw_fini,
6326 .hw_init = gfx_v8_0_hw_init,
6327 .hw_fini = gfx_v8_0_hw_fini,
6328 .suspend = gfx_v8_0_suspend,
6329 .resume = gfx_v8_0_resume,
6330 .is_idle = gfx_v8_0_is_idle,
6331 .wait_for_idle = gfx_v8_0_wait_for_idle,
6332 .soft_reset = gfx_v8_0_soft_reset,
aaa36a97
AD
6333 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
6334 .set_powergating_state = gfx_v8_0_set_powergating_state,
6335};
6336
6337static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
6338 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
6339 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
6340 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
6341 .parse_cs = NULL,
93323131 6342 .emit_ib = gfx_v8_0_ring_emit_ib_gfx,
aaa36a97 6343 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
b8c7b39e 6344 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6345 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6346 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
d2edb07b 6347 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6348 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6349 .test_ring = gfx_v8_0_ring_test_ring,
6350 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6351 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6352 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
6353};
6354
6355static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6356 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
6357 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6358 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6359 .parse_cs = NULL,
93323131 6360 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
aaa36a97 6361 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
b8c7b39e 6362 .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
aaa36a97
AD
6363 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
6364 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
35074d2d 6365 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
d35db561 6366 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
aaa36a97
AD
6367 .test_ring = gfx_v8_0_ring_test_ring,
6368 .test_ib = gfx_v8_0_ring_test_ib,
edff0e28 6369 .insert_nop = amdgpu_ring_insert_nop,
9e5d5309 6370 .pad_ib = amdgpu_ring_generic_pad_ib,
aaa36a97
AD
6371};
6372
6373static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6374{
6375 int i;
6376
6377 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6378 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6379
6380 for (i = 0; i < adev->gfx.num_compute_rings; i++)
6381 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
6382}
6383
6384static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
6385 .set = gfx_v8_0_set_eop_interrupt_state,
6386 .process = gfx_v8_0_eop_irq,
6387};
6388
6389static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
6390 .set = gfx_v8_0_set_priv_reg_fault_state,
6391 .process = gfx_v8_0_priv_reg_irq,
6392};
6393
6394static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6395 .set = gfx_v8_0_set_priv_inst_fault_state,
6396 .process = gfx_v8_0_priv_inst_irq,
6397};
6398
6399static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6400{
6401 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
6402 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
6403
6404 adev->gfx.priv_reg_irq.num_types = 1;
6405 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
6406
6407 adev->gfx.priv_inst_irq.num_types = 1;
6408 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
6409}
6410
dbff57bc
AD
6411static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
6412{
6413 switch (adev->asic_type) {
6414 case CHIP_TOPAZ:
dbff57bc
AD
6415 adev->gfx.rlc.funcs = &iceland_rlc_funcs;
6416 break;
6ab3886c 6417 case CHIP_STONEY:
dbff57bc
AD
6418 case CHIP_CARRIZO:
6419 adev->gfx.rlc.funcs = &cz_rlc_funcs;
6420 break;
6421 default:
6422 adev->gfx.rlc.funcs = &gfx_v8_0_nop_rlc_funcs;
6423 break;
6424 }
6425}
6426
aaa36a97
AD
6427static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
6428{
6429 /* init asci gds info */
6430 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
6431 adev->gds.gws.total_size = 64;
6432 adev->gds.oa.total_size = 16;
6433
6434 if (adev->gds.mem.total_size == 64 * 1024) {
6435 adev->gds.mem.gfx_partition_size = 4096;
6436 adev->gds.mem.cs_partition_size = 4096;
6437
6438 adev->gds.gws.gfx_partition_size = 4;
6439 adev->gds.gws.cs_partition_size = 4;
6440
6441 adev->gds.oa.gfx_partition_size = 4;
6442 adev->gds.oa.cs_partition_size = 1;
6443 } else {
6444 adev->gds.mem.gfx_partition_size = 1024;
6445 adev->gds.mem.cs_partition_size = 1024;
6446
6447 adev->gds.gws.gfx_partition_size = 16;
6448 adev->gds.gws.cs_partition_size = 16;
6449
6450 adev->gds.oa.gfx_partition_size = 4;
6451 adev->gds.oa.cs_partition_size = 4;
6452 }
6453}
6454
9de06de8
NH
6455static void gfx_v8_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
6456 u32 bitmap)
6457{
6458 u32 data;
6459
6460 if (!bitmap)
6461 return;
6462
6463 data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
6464 data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6465
6466 WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
6467}
6468
8f8e00c1 6469static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
aaa36a97 6470{
8f8e00c1 6471 u32 data, mask;
aaa36a97 6472
8f8e00c1
AD
6473 data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
6474 data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
aaa36a97 6475
8f8e00c1
AD
6476 data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
6477 data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
aaa36a97 6478
6157bd7a 6479 mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
aaa36a97 6480
8f8e00c1 6481 return (~data) & mask;
aaa36a97
AD
6482}
6483
7dae69a2 6484static void gfx_v8_0_get_cu_info(struct amdgpu_device *adev)
aaa36a97
AD
6485{
6486 int i, j, k, counter, active_cu_number = 0;
6487 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
7dae69a2 6488 struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
9de06de8 6489 unsigned disable_masks[4 * 2];
aaa36a97 6490
6157bd7a
FC
6491 memset(cu_info, 0, sizeof(*cu_info));
6492
9de06de8
NH
6493 amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
6494
aaa36a97
AD
6495 mutex_lock(&adev->grbm_idx_mutex);
6496 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
6497 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
6498 mask = 1;
6499 ao_bitmap = 0;
6500 counter = 0;
9559ef5b 6501 gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
9de06de8
NH
6502 if (i < 4 && j < 2)
6503 gfx_v8_0_set_user_cu_inactive_bitmap(
6504 adev, disable_masks[i * 2 + j]);
8f8e00c1 6505 bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
aaa36a97
AD
6506 cu_info->bitmap[i][j] = bitmap;
6507
8f8e00c1 6508 for (k = 0; k < 16; k ++) {
aaa36a97
AD
6509 if (bitmap & mask) {
6510 if (counter < 2)
6511 ao_bitmap |= mask;
6512 counter ++;
6513 }
6514 mask <<= 1;
6515 }
6516 active_cu_number += counter;
6517 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
6518 }
6519 }
9559ef5b 6520 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
8f8e00c1 6521 mutex_unlock(&adev->grbm_idx_mutex);
aaa36a97
AD
6522
6523 cu_info->number = active_cu_number;
6524 cu_info->ao_cu_mask = ao_cu_mask;
aaa36a97 6525}
This page took 0.679439 seconds and 5 git commands to generate.