aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2015-04-20 17:31:14 -0400
committerAlex Deucher <alexander.deucher@amd.com>2015-06-03 21:03:17 -0400
commitaaa36a976bbb9b02a54c087ff390c0bad1d18e3e (patch)
tree105be3c06ef33c39e6934801d386847950d4ebf9 /drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
parenta2e73f56fa6282481927ec43aa9362c03c2e2104 (diff)
drm/amdgpu: Add initial VI support
This adds initial support for VI asics. This includes Iceland, Tonga, and Carrizo. Our inital focus as been Carrizo, so there are still gaps in support for Tonga and Iceland, notably power management. Acked-by: Christian König <christian.koenig@amd.com> Acked-by: Jammy Zhou <Jammy.Zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c4286
1 files changed, 4286 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
new file mode 100644
index 000000000000..a8397dd2bce4
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -0,0 +1,4286 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23#include <linux/firmware.h>
24#include "drmP.h"
25#include "amdgpu.h"
26#include "amdgpu_gfx.h"
27#include "vi.h"
28#include "vid.h"
29#include "amdgpu_ucode.h"
30#include "clearstate_vi.h"
31
32#include "gmc/gmc_8_2_d.h"
33#include "gmc/gmc_8_2_sh_mask.h"
34
35#include "oss/oss_3_0_d.h"
36#include "oss/oss_3_0_sh_mask.h"
37
38#include "bif/bif_5_0_d.h"
39#include "bif/bif_5_0_sh_mask.h"
40
41#include "gca/gfx_8_0_d.h"
42#include "gca/gfx_8_0_enum.h"
43#include "gca/gfx_8_0_sh_mask.h"
44#include "gca/gfx_8_0_enum.h"
45
46#include "uvd/uvd_5_0_d.h"
47#include "uvd/uvd_5_0_sh_mask.h"
48
49#include "dce/dce_10_0_d.h"
50#include "dce/dce_10_0_sh_mask.h"
51
52#define GFX8_NUM_GFX_RINGS 1
53#define GFX8_NUM_COMPUTE_RINGS 8
54
55#define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001
56#define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001
57#define TONGA_GB_ADDR_CONFIG_GOLDEN 0x22011003
58
59#define ARRAY_MODE(x) ((x) << GB_TILE_MODE0__ARRAY_MODE__SHIFT)
60#define PIPE_CONFIG(x) ((x) << GB_TILE_MODE0__PIPE_CONFIG__SHIFT)
61#define TILE_SPLIT(x) ((x) << GB_TILE_MODE0__TILE_SPLIT__SHIFT)
62#define MICRO_TILE_MODE_NEW(x) ((x) << GB_TILE_MODE0__MICRO_TILE_MODE_NEW__SHIFT)
63#define SAMPLE_SPLIT(x) ((x) << GB_TILE_MODE0__SAMPLE_SPLIT__SHIFT)
64#define BANK_WIDTH(x) ((x) << GB_MACROTILE_MODE0__BANK_WIDTH__SHIFT)
65#define BANK_HEIGHT(x) ((x) << GB_MACROTILE_MODE0__BANK_HEIGHT__SHIFT)
66#define MACRO_TILE_ASPECT(x) ((x) << GB_MACROTILE_MODE0__MACRO_TILE_ASPECT__SHIFT)
67#define NUM_BANKS(x) ((x) << GB_MACROTILE_MODE0__NUM_BANKS__SHIFT)
68
69MODULE_FIRMWARE("radeon/carrizo_ce.bin");
70MODULE_FIRMWARE("radeon/carrizo_pfp.bin");
71MODULE_FIRMWARE("radeon/carrizo_me.bin");
72MODULE_FIRMWARE("radeon/carrizo_mec.bin");
73MODULE_FIRMWARE("radeon/carrizo_mec2.bin");
74MODULE_FIRMWARE("radeon/carrizo_rlc.bin");
75
76MODULE_FIRMWARE("radeon/tonga_ce.bin");
77MODULE_FIRMWARE("radeon/tonga_pfp.bin");
78MODULE_FIRMWARE("radeon/tonga_me.bin");
79MODULE_FIRMWARE("radeon/tonga_mec.bin");
80MODULE_FIRMWARE("radeon/tonga_mec2.bin");
81MODULE_FIRMWARE("radeon/tonga_rlc.bin");
82
83MODULE_FIRMWARE("radeon/topaz_ce.bin");
84MODULE_FIRMWARE("radeon/topaz_pfp.bin");
85MODULE_FIRMWARE("radeon/topaz_me.bin");
86MODULE_FIRMWARE("radeon/topaz_mec.bin");
87MODULE_FIRMWARE("radeon/topaz_mec2.bin");
88MODULE_FIRMWARE("radeon/topaz_rlc.bin");
89
90static const struct amdgpu_gds_reg_offset amdgpu_gds_reg_offset[] =
91{
92 {mmGDS_VMID0_BASE, mmGDS_VMID0_SIZE, mmGDS_GWS_VMID0, mmGDS_OA_VMID0},
93 {mmGDS_VMID1_BASE, mmGDS_VMID1_SIZE, mmGDS_GWS_VMID1, mmGDS_OA_VMID1},
94 {mmGDS_VMID2_BASE, mmGDS_VMID2_SIZE, mmGDS_GWS_VMID2, mmGDS_OA_VMID2},
95 {mmGDS_VMID3_BASE, mmGDS_VMID3_SIZE, mmGDS_GWS_VMID3, mmGDS_OA_VMID3},
96 {mmGDS_VMID4_BASE, mmGDS_VMID4_SIZE, mmGDS_GWS_VMID4, mmGDS_OA_VMID4},
97 {mmGDS_VMID5_BASE, mmGDS_VMID5_SIZE, mmGDS_GWS_VMID5, mmGDS_OA_VMID5},
98 {mmGDS_VMID6_BASE, mmGDS_VMID6_SIZE, mmGDS_GWS_VMID6, mmGDS_OA_VMID6},
99 {mmGDS_VMID7_BASE, mmGDS_VMID7_SIZE, mmGDS_GWS_VMID7, mmGDS_OA_VMID7},
100 {mmGDS_VMID8_BASE, mmGDS_VMID8_SIZE, mmGDS_GWS_VMID8, mmGDS_OA_VMID8},
101 {mmGDS_VMID9_BASE, mmGDS_VMID9_SIZE, mmGDS_GWS_VMID9, mmGDS_OA_VMID9},
102 {mmGDS_VMID10_BASE, mmGDS_VMID10_SIZE, mmGDS_GWS_VMID10, mmGDS_OA_VMID10},
103 {mmGDS_VMID11_BASE, mmGDS_VMID11_SIZE, mmGDS_GWS_VMID11, mmGDS_OA_VMID11},
104 {mmGDS_VMID12_BASE, mmGDS_VMID12_SIZE, mmGDS_GWS_VMID12, mmGDS_OA_VMID12},
105 {mmGDS_VMID13_BASE, mmGDS_VMID13_SIZE, mmGDS_GWS_VMID13, mmGDS_OA_VMID13},
106 {mmGDS_VMID14_BASE, mmGDS_VMID14_SIZE, mmGDS_GWS_VMID14, mmGDS_OA_VMID14},
107 {mmGDS_VMID15_BASE, mmGDS_VMID15_SIZE, mmGDS_GWS_VMID15, mmGDS_OA_VMID15}
108};
109
110static const u32 golden_settings_tonga_a11[] =
111{
112 mmCB_HW_CONTROL, 0xfffdf3cf, 0x00007208,
113 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
114 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
115 mmGB_GPU_ID, 0x0000000f, 0x00000000,
116 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
117 mmPA_SC_FIFO_DEPTH_CNTL, 0x000003ff, 0x000000fc,
118 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
119 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
120 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
121 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000002fb,
122 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x0000543b,
123 mmTCP_CHAN_STEER_LO, 0xffffffff, 0xa9210876,
124};
125
126static const u32 tonga_golden_common_all[] =
127{
128 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
129 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x16000012,
130 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x0000002A,
131 mmGB_ADDR_CONFIG, 0xffffffff, 0x22011003,
132 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
133 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
134 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
135 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
136};
137
138static const u32 tonga_mgcg_cgcg_init[] =
139{
140 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
141 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
142 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
143 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
144 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
145 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
146 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x40000100,
147 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
148 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
149 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
150 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
151 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
152 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
153 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
154 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
155 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
156 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
157 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
158 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
159 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
160 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
161 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
162 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
163 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
164 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
165 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
166 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
167 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
168 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
169 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
170 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
171 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
172 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
173 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
174 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
175 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
176 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
177 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
178 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
179 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
180 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
181 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
182 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
183 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
184 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
185 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
186 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
187 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
188 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
189 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
190 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
191 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
192 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
193 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
194 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
195 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
196 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
197 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
198 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
199 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
200 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
201 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
202 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
203 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
204 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
205 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
206 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
207 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
208 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
209 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
210 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
211 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
212 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
213 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
214 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
215};
216
217static const u32 golden_settings_iceland_a11[] =
218{
219 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
220 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
221 mmDB_DEBUG3, 0xc0000000, 0xc0000000,
222 mmGB_GPU_ID, 0x0000000f, 0x00000000,
223 mmPA_SC_ENHANCE, 0xffffffff, 0x20000001,
224 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
225 mmPA_SC_RASTER_CONFIG, 0x3f3fffff, 0x00000002,
226 mmPA_SC_RASTER_CONFIG_1, 0x0000003f, 0x00000000,
227 mmTA_CNTL_AUX, 0x000f000f, 0x000b0000,
228 mmTCC_CTRL, 0x00100000, 0xf31fff7f,
229 mmTCP_ADDR_CONFIG, 0x000003ff, 0x000000f1,
230 mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000,
231 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010,
232};
233
234static const u32 iceland_golden_common_all[] =
235{
236 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
237 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
238 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
239 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
240 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
241 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
242 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
243 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
244};
245
246static const u32 iceland_mgcg_cgcg_init[] =
247{
248 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
249 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
250 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
251 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
252 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0xc0000100,
253 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0xc0000100,
254 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0xc0000100,
255 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
256 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
257 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
258 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
259 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
260 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
261 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
262 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
263 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
264 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
265 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
266 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
267 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
268 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
269 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
270 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0xff000100,
271 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
272 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
273 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
274 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
275 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
276 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
277 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
278 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
279 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
280 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
281 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
282 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
283 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
284 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
285 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
286 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
287 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
288 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
289 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
290 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
291 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
292 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
293 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
294 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
295 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
296 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
297 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
298 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
299 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
300 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
301 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x0f840f87,
302 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
303 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
304 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
305 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
306 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
307 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
308 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
309 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
310 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
311 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003c,
312};
313
314static const u32 cz_golden_settings_a11[] =
315{
316 mmCB_HW_CONTROL_3, 0x00000040, 0x00000040,
317 mmDB_DEBUG2, 0xf00fffff, 0x00000400,
318 mmGB_GPU_ID, 0x0000000f, 0x00000000,
319 mmPA_SC_ENHANCE, 0xffffffff, 0x00000001,
320 mmPA_SC_LINE_STIPPLE_STATE, 0x0000ff0f, 0x00000000,
321 mmTA_CNTL_AUX, 0x000f000f, 0x00010000,
322 mmTCP_ADDR_CONFIG, 0x0000000f, 0x000000f3,
323 mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00001302
324};
325
326static const u32 cz_golden_common_all[] =
327{
328 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
329 mmPA_SC_RASTER_CONFIG, 0xffffffff, 0x00000002,
330 mmPA_SC_RASTER_CONFIG_1, 0xffffffff, 0x00000000,
331 mmGB_ADDR_CONFIG, 0xffffffff, 0x22010001,
332 mmSPI_RESOURCE_RESERVE_CU_0, 0xffffffff, 0x00000800,
333 mmSPI_RESOURCE_RESERVE_CU_1, 0xffffffff, 0x00000800,
334 mmSPI_RESOURCE_RESERVE_EN_CU_0, 0xffffffff, 0x00007FBF,
335 mmSPI_RESOURCE_RESERVE_EN_CU_1, 0xffffffff, 0x00007FAF
336};
337
338static const u32 cz_mgcg_cgcg_init[] =
339{
340 mmRLC_CGTT_MGCG_OVERRIDE, 0xffffffff, 0xffffffff,
341 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
342 mmCB_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
343 mmCGTT_BCI_CLK_CTRL, 0xffffffff, 0x00000100,
344 mmCGTT_CP_CLK_CTRL, 0xffffffff, 0x00000100,
345 mmCGTT_CPC_CLK_CTRL, 0xffffffff, 0x00000100,
346 mmCGTT_CPF_CLK_CTRL, 0xffffffff, 0x00000100,
347 mmCGTT_GDS_CLK_CTRL, 0xffffffff, 0x00000100,
348 mmCGTT_IA_CLK_CTRL, 0xffffffff, 0x06000100,
349 mmCGTT_PA_CLK_CTRL, 0xffffffff, 0x00000100,
350 mmCGTT_WD_CLK_CTRL, 0xffffffff, 0x06000100,
351 mmCGTT_PC_CLK_CTRL, 0xffffffff, 0x00000100,
352 mmCGTT_RLC_CLK_CTRL, 0xffffffff, 0x00000100,
353 mmCGTT_SC_CLK_CTRL, 0xffffffff, 0x00000100,
354 mmCGTT_SPI_CLK_CTRL, 0xffffffff, 0x00000100,
355 mmCGTT_SQ_CLK_CTRL, 0xffffffff, 0x00000100,
356 mmCGTT_SQG_CLK_CTRL, 0xffffffff, 0x00000100,
357 mmCGTT_SX_CLK_CTRL0, 0xffffffff, 0x00000100,
358 mmCGTT_SX_CLK_CTRL1, 0xffffffff, 0x00000100,
359 mmCGTT_SX_CLK_CTRL2, 0xffffffff, 0x00000100,
360 mmCGTT_SX_CLK_CTRL3, 0xffffffff, 0x00000100,
361 mmCGTT_SX_CLK_CTRL4, 0xffffffff, 0x00000100,
362 mmCGTT_TCI_CLK_CTRL, 0xffffffff, 0x00000100,
363 mmCGTT_TCP_CLK_CTRL, 0xffffffff, 0x00000100,
364 mmCGTT_VGT_CLK_CTRL, 0xffffffff, 0x06000100,
365 mmDB_CGTT_CLK_CTRL_0, 0xffffffff, 0x00000100,
366 mmTA_CGTT_CTRL, 0xffffffff, 0x00000100,
367 mmTCA_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
368 mmTCC_CGTT_SCLK_CTRL, 0xffffffff, 0x00000100,
369 mmTD_CGTT_CTRL, 0xffffffff, 0x00000100,
370 mmGRBM_GFX_INDEX, 0xffffffff, 0xe0000000,
371 mmCGTS_CU0_SP0_CTRL_REG, 0xffffffff, 0x00010000,
372 mmCGTS_CU0_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
373 mmCGTS_CU0_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
374 mmCGTS_CU0_SP1_CTRL_REG, 0xffffffff, 0x00060005,
375 mmCGTS_CU0_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
376 mmCGTS_CU1_SP0_CTRL_REG, 0xffffffff, 0x00010000,
377 mmCGTS_CU1_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
378 mmCGTS_CU1_TA_CTRL_REG, 0xffffffff, 0x00040007,
379 mmCGTS_CU1_SP1_CTRL_REG, 0xffffffff, 0x00060005,
380 mmCGTS_CU1_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
381 mmCGTS_CU2_SP0_CTRL_REG, 0xffffffff, 0x00010000,
382 mmCGTS_CU2_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
383 mmCGTS_CU2_TA_CTRL_REG, 0xffffffff, 0x00040007,
384 mmCGTS_CU2_SP1_CTRL_REG, 0xffffffff, 0x00060005,
385 mmCGTS_CU2_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
386 mmCGTS_CU3_SP0_CTRL_REG, 0xffffffff, 0x00010000,
387 mmCGTS_CU3_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
388 mmCGTS_CU3_TA_CTRL_REG, 0xffffffff, 0x00040007,
389 mmCGTS_CU3_SP1_CTRL_REG, 0xffffffff, 0x00060005,
390 mmCGTS_CU3_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
391 mmCGTS_CU4_SP0_CTRL_REG, 0xffffffff, 0x00010000,
392 mmCGTS_CU4_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
393 mmCGTS_CU4_TA_SQC_CTRL_REG, 0xffffffff, 0x00040007,
394 mmCGTS_CU4_SP1_CTRL_REG, 0xffffffff, 0x00060005,
395 mmCGTS_CU4_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
396 mmCGTS_CU5_SP0_CTRL_REG, 0xffffffff, 0x00010000,
397 mmCGTS_CU5_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
398 mmCGTS_CU5_TA_CTRL_REG, 0xffffffff, 0x00040007,
399 mmCGTS_CU5_SP1_CTRL_REG, 0xffffffff, 0x00060005,
400 mmCGTS_CU5_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
401 mmCGTS_CU6_SP0_CTRL_REG, 0xffffffff, 0x00010000,
402 mmCGTS_CU6_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
403 mmCGTS_CU6_TA_CTRL_REG, 0xffffffff, 0x00040007,
404 mmCGTS_CU6_SP1_CTRL_REG, 0xffffffff, 0x00060005,
405 mmCGTS_CU6_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
406 mmCGTS_CU7_SP0_CTRL_REG, 0xffffffff, 0x00010000,
407 mmCGTS_CU7_LDS_SQ_CTRL_REG, 0xffffffff, 0x00030002,
408 mmCGTS_CU7_TA_CTRL_REG, 0xffffffff, 0x00040007,
409 mmCGTS_CU7_SP1_CTRL_REG, 0xffffffff, 0x00060005,
410 mmCGTS_CU7_TD_TCP_CTRL_REG, 0xffffffff, 0x00090008,
411 mmCGTS_SM_CTRL_REG, 0xffffffff, 0x96e00200,
412 mmCP_RB_WPTR_POLL_CNTL, 0xffffffff, 0x00900100,
413 mmRLC_CGCG_CGLS_CTRL, 0xffffffff, 0x0020003f,
414 mmCP_MEM_SLP_CNTL, 0x00000001, 0x00000001,
415};
416
417static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev);
418static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev);
419static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev);
420
421static void gfx_v8_0_init_golden_registers(struct amdgpu_device *adev)
422{
423 switch (adev->asic_type) {
424 case CHIP_TOPAZ:
425 amdgpu_program_register_sequence(adev,
426 iceland_mgcg_cgcg_init,
427 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
428 amdgpu_program_register_sequence(adev,
429 golden_settings_iceland_a11,
430 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
431 amdgpu_program_register_sequence(adev,
432 iceland_golden_common_all,
433 (const u32)ARRAY_SIZE(iceland_golden_common_all));
434 break;
435 case CHIP_TONGA:
436 amdgpu_program_register_sequence(adev,
437 tonga_mgcg_cgcg_init,
438 (const u32)ARRAY_SIZE(tonga_mgcg_cgcg_init));
439 amdgpu_program_register_sequence(adev,
440 golden_settings_tonga_a11,
441 (const u32)ARRAY_SIZE(golden_settings_tonga_a11));
442 amdgpu_program_register_sequence(adev,
443 tonga_golden_common_all,
444 (const u32)ARRAY_SIZE(tonga_golden_common_all));
445 break;
446 case CHIP_CARRIZO:
447 amdgpu_program_register_sequence(adev,
448 cz_mgcg_cgcg_init,
449 (const u32)ARRAY_SIZE(cz_mgcg_cgcg_init));
450 amdgpu_program_register_sequence(adev,
451 cz_golden_settings_a11,
452 (const u32)ARRAY_SIZE(cz_golden_settings_a11));
453 amdgpu_program_register_sequence(adev,
454 cz_golden_common_all,
455 (const u32)ARRAY_SIZE(cz_golden_common_all));
456 break;
457 default:
458 break;
459 }
460}
461
462static void gfx_v8_0_scratch_init(struct amdgpu_device *adev)
463{
464 int i;
465
466 adev->gfx.scratch.num_reg = 7;
467 adev->gfx.scratch.reg_base = mmSCRATCH_REG0;
468 for (i = 0; i < adev->gfx.scratch.num_reg; i++) {
469 adev->gfx.scratch.free[i] = true;
470 adev->gfx.scratch.reg[i] = adev->gfx.scratch.reg_base + i;
471 }
472}
473
474static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
475{
476 struct amdgpu_device *adev = ring->adev;
477 uint32_t scratch;
478 uint32_t tmp = 0;
479 unsigned i;
480 int r;
481
482 r = amdgpu_gfx_scratch_get(adev, &scratch);
483 if (r) {
484 DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r);
485 return r;
486 }
487 WREG32(scratch, 0xCAFEDEAD);
488 r = amdgpu_ring_lock(ring, 3);
489 if (r) {
490 DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
491 ring->idx, r);
492 amdgpu_gfx_scratch_free(adev, scratch);
493 return r;
494 }
495 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
496 amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
497 amdgpu_ring_write(ring, 0xDEADBEEF);
498 amdgpu_ring_unlock_commit(ring);
499
500 for (i = 0; i < adev->usec_timeout; i++) {
501 tmp = RREG32(scratch);
502 if (tmp == 0xDEADBEEF)
503 break;
504 DRM_UDELAY(1);
505 }
506 if (i < adev->usec_timeout) {
507 DRM_INFO("ring test on %d succeeded in %d usecs\n",
508 ring->idx, i);
509 } else {
510 DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
511 ring->idx, scratch, tmp);
512 r = -EINVAL;
513 }
514 amdgpu_gfx_scratch_free(adev, scratch);
515 return r;
516}
517
518static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
519{
520 struct amdgpu_device *adev = ring->adev;
521 struct amdgpu_ib ib;
522 uint32_t scratch;
523 uint32_t tmp = 0;
524 unsigned i;
525 int r;
526
527 r = amdgpu_gfx_scratch_get(adev, &scratch);
528 if (r) {
529 DRM_ERROR("amdgpu: failed to get scratch reg (%d).\n", r);
530 return r;
531 }
532 WREG32(scratch, 0xCAFEDEAD);
533 r = amdgpu_ib_get(ring, NULL, 256, &ib);
534 if (r) {
535 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
536 amdgpu_gfx_scratch_free(adev, scratch);
537 return r;
538 }
539 ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
540 ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START));
541 ib.ptr[2] = 0xDEADBEEF;
542 ib.length_dw = 3;
543 r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED);
544 if (r) {
545 amdgpu_gfx_scratch_free(adev, scratch);
546 amdgpu_ib_free(adev, &ib);
547 DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r);
548 return r;
549 }
550 r = amdgpu_fence_wait(ib.fence, false);
551 if (r) {
552 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
553 amdgpu_gfx_scratch_free(adev, scratch);
554 amdgpu_ib_free(adev, &ib);
555 return r;
556 }
557 for (i = 0; i < adev->usec_timeout; i++) {
558 tmp = RREG32(scratch);
559 if (tmp == 0xDEADBEEF)
560 break;
561 DRM_UDELAY(1);
562 }
563 if (i < adev->usec_timeout) {
564 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
565 ib.fence->ring->idx, i);
566 } else {
567 DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n",
568 scratch, tmp);
569 r = -EINVAL;
570 }
571 amdgpu_gfx_scratch_free(adev, scratch);
572 amdgpu_ib_free(adev, &ib);
573 return r;
574}
575
576static int gfx_v8_0_init_microcode(struct amdgpu_device *adev)
577{
578 const char *chip_name;
579 char fw_name[30];
580 int err;
581 struct amdgpu_firmware_info *info = NULL;
582 const struct common_firmware_header *header = NULL;
583
584 DRM_DEBUG("\n");
585
586 switch (adev->asic_type) {
587 case CHIP_TOPAZ:
588 chip_name = "topaz";
589 break;
590 case CHIP_TONGA:
591 chip_name = "tonga";
592 break;
593 case CHIP_CARRIZO:
594 chip_name = "carrizo";
595 break;
596 default:
597 BUG();
598 }
599
600 snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
601 err = request_firmware(&adev->gfx.pfp_fw, fw_name, adev->dev);
602 if (err)
603 goto out;
604 err = amdgpu_ucode_validate(adev->gfx.pfp_fw);
605 if (err)
606 goto out;
607
608 snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
609 err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev);
610 if (err)
611 goto out;
612 err = amdgpu_ucode_validate(adev->gfx.me_fw);
613 if (err)
614 goto out;
615
616 snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
617 err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev);
618 if (err)
619 goto out;
620 err = amdgpu_ucode_validate(adev->gfx.ce_fw);
621 if (err)
622 goto out;
623
624 snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
625 err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
626 if (err)
627 goto out;
628 err = amdgpu_ucode_validate(adev->gfx.rlc_fw);
629
630 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
631 err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev);
632 if (err)
633 goto out;
634 err = amdgpu_ucode_validate(adev->gfx.mec_fw);
635 if (err)
636 goto out;
637
638 snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", chip_name);
639 err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev);
640 if (!err) {
641 err = amdgpu_ucode_validate(adev->gfx.mec2_fw);
642 if (err)
643 goto out;
644 } else {
645 err = 0;
646 adev->gfx.mec2_fw = NULL;
647 }
648
649 if (adev->firmware.smu_load) {
650 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP];
651 info->ucode_id = AMDGPU_UCODE_ID_CP_PFP;
652 info->fw = adev->gfx.pfp_fw;
653 header = (const struct common_firmware_header *)info->fw->data;
654 adev->firmware.fw_size +=
655 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
656
657 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME];
658 info->ucode_id = AMDGPU_UCODE_ID_CP_ME;
659 info->fw = adev->gfx.me_fw;
660 header = (const struct common_firmware_header *)info->fw->data;
661 adev->firmware.fw_size +=
662 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
663
664 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE];
665 info->ucode_id = AMDGPU_UCODE_ID_CP_CE;
666 info->fw = adev->gfx.ce_fw;
667 header = (const struct common_firmware_header *)info->fw->data;
668 adev->firmware.fw_size +=
669 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
670
671 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G];
672 info->ucode_id = AMDGPU_UCODE_ID_RLC_G;
673 info->fw = adev->gfx.rlc_fw;
674 header = (const struct common_firmware_header *)info->fw->data;
675 adev->firmware.fw_size +=
676 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
677
678 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
679 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
680 info->fw = adev->gfx.mec_fw;
681 header = (const struct common_firmware_header *)info->fw->data;
682 adev->firmware.fw_size +=
683 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
684
685 if (adev->gfx.mec2_fw) {
686 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2];
687 info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2;
688 info->fw = adev->gfx.mec2_fw;
689 header = (const struct common_firmware_header *)info->fw->data;
690 adev->firmware.fw_size +=
691 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
692 }
693
694 }
695
696out:
697 if (err) {
698 dev_err(adev->dev,
699 "gfx8: Failed to load firmware \"%s\"\n",
700 fw_name);
701 release_firmware(adev->gfx.pfp_fw);
702 adev->gfx.pfp_fw = NULL;
703 release_firmware(adev->gfx.me_fw);
704 adev->gfx.me_fw = NULL;
705 release_firmware(adev->gfx.ce_fw);
706 adev->gfx.ce_fw = NULL;
707 release_firmware(adev->gfx.rlc_fw);
708 adev->gfx.rlc_fw = NULL;
709 release_firmware(adev->gfx.mec_fw);
710 adev->gfx.mec_fw = NULL;
711 release_firmware(adev->gfx.mec2_fw);
712 adev->gfx.mec2_fw = NULL;
713 }
714 return err;
715}
716
717static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
718{
719 int r;
720
721 if (adev->gfx.mec.hpd_eop_obj) {
722 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
723 if (unlikely(r != 0))
724 dev_warn(adev->dev, "(%d) reserve HPD EOP bo failed\n", r);
725 amdgpu_bo_unpin(adev->gfx.mec.hpd_eop_obj);
726 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
727
728 amdgpu_bo_unref(&adev->gfx.mec.hpd_eop_obj);
729 adev->gfx.mec.hpd_eop_obj = NULL;
730 }
731}
732
733#define MEC_HPD_SIZE 2048
734
735static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
736{
737 int r;
738 u32 *hpd;
739
740 /*
741 * we assign only 1 pipe because all other pipes will
742 * be handled by KFD
743 */
744 adev->gfx.mec.num_mec = 1;
745 adev->gfx.mec.num_pipe = 1;
746 adev->gfx.mec.num_queue = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe * 8;
747
748 if (adev->gfx.mec.hpd_eop_obj == NULL) {
749 r = amdgpu_bo_create(adev,
750 adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2,
751 PAGE_SIZE, true,
752 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
753 &adev->gfx.mec.hpd_eop_obj);
754 if (r) {
755 dev_warn(adev->dev, "(%d) create HDP EOP bo failed\n", r);
756 return r;
757 }
758 }
759
760 r = amdgpu_bo_reserve(adev->gfx.mec.hpd_eop_obj, false);
761 if (unlikely(r != 0)) {
762 gfx_v8_0_mec_fini(adev);
763 return r;
764 }
765 r = amdgpu_bo_pin(adev->gfx.mec.hpd_eop_obj, AMDGPU_GEM_DOMAIN_GTT,
766 &adev->gfx.mec.hpd_eop_gpu_addr);
767 if (r) {
768 dev_warn(adev->dev, "(%d) pin HDP EOP bo failed\n", r);
769 gfx_v8_0_mec_fini(adev);
770 return r;
771 }
772 r = amdgpu_bo_kmap(adev->gfx.mec.hpd_eop_obj, (void **)&hpd);
773 if (r) {
774 dev_warn(adev->dev, "(%d) map HDP EOP bo failed\n", r);
775 gfx_v8_0_mec_fini(adev);
776 return r;
777 }
778
779 memset(hpd, 0, adev->gfx.mec.num_mec *adev->gfx.mec.num_pipe * MEC_HPD_SIZE * 2);
780
781 amdgpu_bo_kunmap(adev->gfx.mec.hpd_eop_obj);
782 amdgpu_bo_unreserve(adev->gfx.mec.hpd_eop_obj);
783
784 return 0;
785}
786
787static int gfx_v8_0_sw_init(struct amdgpu_device *adev)
788{
789 int i, r;
790 struct amdgpu_ring *ring;
791
792 /* EOP Event */
793 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
794 if (r)
795 return r;
796
797 /* Privileged reg */
798 r = amdgpu_irq_add_id(adev, 184, &adev->gfx.priv_reg_irq);
799 if (r)
800 return r;
801
802 /* Privileged inst */
803 r = amdgpu_irq_add_id(adev, 185, &adev->gfx.priv_inst_irq);
804 if (r)
805 return r;
806
807 adev->gfx.gfx_current_status = AMDGPU_GFX_NORMAL_MODE;
808
809 gfx_v8_0_scratch_init(adev);
810
811 r = gfx_v8_0_init_microcode(adev);
812 if (r) {
813 DRM_ERROR("Failed to load gfx firmware!\n");
814 return r;
815 }
816
817 r = gfx_v8_0_mec_init(adev);
818 if (r) {
819 DRM_ERROR("Failed to init MEC BOs!\n");
820 return r;
821 }
822
823 r = amdgpu_wb_get(adev, &adev->gfx.ce_sync_offs);
824 if (r) {
825 DRM_ERROR("(%d) gfx.ce_sync_offs wb alloc failed\n", r);
826 return r;
827 }
828
829 /* set up the gfx ring */
830 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
831 ring = &adev->gfx.gfx_ring[i];
832 ring->ring_obj = NULL;
833 sprintf(ring->name, "gfx");
834 /* no gfx doorbells on iceland */
835 if (adev->asic_type != CHIP_TOPAZ) {
836 ring->use_doorbell = true;
837 ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0;
838 }
839
840 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
841 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
842 &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP,
843 AMDGPU_RING_TYPE_GFX);
844 if (r)
845 return r;
846 }
847
848 /* set up the compute queues */
849 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
850 unsigned irq_type;
851
852 /* max 32 queues per MEC */
853 if ((i >= 32) || (i >= AMDGPU_MAX_COMPUTE_RINGS)) {
854 DRM_ERROR("Too many (%d) compute rings!\n", i);
855 break;
856 }
857 ring = &adev->gfx.compute_ring[i];
858 ring->ring_obj = NULL;
859 ring->use_doorbell = true;
860 ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + i;
861 ring->me = 1; /* first MEC */
862 ring->pipe = i / 8;
863 ring->queue = i % 8;
864 sprintf(ring->name, "comp %d.%d.%d", ring->me, ring->pipe, ring->queue);
865 irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP + ring->pipe;
866 /* type-2 packets are deprecated on MEC, use type-3 instead */
867 r = amdgpu_ring_init(adev, ring, 1024 * 1024,
868 PACKET3(PACKET3_NOP, 0x3FFF), 0xf,
869 &adev->gfx.eop_irq, irq_type,
870 AMDGPU_RING_TYPE_COMPUTE);
871 if (r)
872 return r;
873 }
874
875 /* reserve GDS, GWS and OA resource for gfx */
876 r = amdgpu_bo_create(adev, adev->gds.mem.gfx_partition_size,
877 PAGE_SIZE, true,
878 AMDGPU_GEM_DOMAIN_GDS, 0,
879 NULL, &adev->gds.gds_gfx_bo);
880 if (r)
881 return r;
882
883 r = amdgpu_bo_create(adev, adev->gds.gws.gfx_partition_size,
884 PAGE_SIZE, true,
885 AMDGPU_GEM_DOMAIN_GWS, 0,
886 NULL, &adev->gds.gws_gfx_bo);
887 if (r)
888 return r;
889
890 r = amdgpu_bo_create(adev, adev->gds.oa.gfx_partition_size,
891 PAGE_SIZE, true,
892 AMDGPU_GEM_DOMAIN_OA, 0,
893 NULL, &adev->gds.oa_gfx_bo);
894 if (r)
895 return r;
896
897 return 0;
898}
899
900static int gfx_v8_0_sw_fini(struct amdgpu_device *adev)
901{
902 int i;
903
904 amdgpu_bo_unref(&adev->gds.oa_gfx_bo);
905 amdgpu_bo_unref(&adev->gds.gws_gfx_bo);
906 amdgpu_bo_unref(&adev->gds.gds_gfx_bo);
907
908 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
909 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
910 for (i = 0; i < adev->gfx.num_compute_rings; i++)
911 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
912
913 amdgpu_wb_free(adev, adev->gfx.ce_sync_offs);
914
915 gfx_v8_0_mec_fini(adev);
916
917 return 0;
918}
919
920static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
921{
922 const u32 num_tile_mode_states = 32;
923 const u32 num_secondary_tile_mode_states = 16;
924 u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
925
926 switch (adev->gfx.config.mem_row_size_in_kb) {
927 case 1:
928 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
929 break;
930 case 2:
931 default:
932 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
933 break;
934 case 4:
935 split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
936 break;
937 }
938
939 switch (adev->asic_type) {
940 case CHIP_TOPAZ:
941 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
942 switch (reg_offset) {
943 case 0:
944 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
945 PIPE_CONFIG(ADDR_SURF_P2) |
946 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
947 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
948 break;
949 case 1:
950 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
951 PIPE_CONFIG(ADDR_SURF_P2) |
952 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
953 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
954 break;
955 case 2:
956 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
957 PIPE_CONFIG(ADDR_SURF_P2) |
958 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
959 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
960 break;
961 case 3:
962 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
963 PIPE_CONFIG(ADDR_SURF_P2) |
964 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
965 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
966 break;
967 case 4:
968 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
969 PIPE_CONFIG(ADDR_SURF_P2) |
970 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
971 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
972 break;
973 case 5:
974 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
975 PIPE_CONFIG(ADDR_SURF_P2) |
976 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
977 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
978 break;
979 case 6:
980 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
981 PIPE_CONFIG(ADDR_SURF_P2) |
982 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
983 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
984 break;
985 case 8:
986 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
987 PIPE_CONFIG(ADDR_SURF_P2));
988 break;
989 case 9:
990 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
991 PIPE_CONFIG(ADDR_SURF_P2) |
992 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
993 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
994 break;
995 case 10:
996 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
997 PIPE_CONFIG(ADDR_SURF_P2) |
998 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
999 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1000 break;
1001 case 11:
1002 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1003 PIPE_CONFIG(ADDR_SURF_P2) |
1004 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1006 break;
1007 case 13:
1008 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1009 PIPE_CONFIG(ADDR_SURF_P2) |
1010 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1011 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1012 break;
1013 case 14:
1014 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1015 PIPE_CONFIG(ADDR_SURF_P2) |
1016 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1017 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1018 break;
1019 case 15:
1020 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1021 PIPE_CONFIG(ADDR_SURF_P2) |
1022 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1023 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1024 break;
1025 case 16:
1026 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1027 PIPE_CONFIG(ADDR_SURF_P2) |
1028 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1029 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1030 break;
1031 case 18:
1032 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1033 PIPE_CONFIG(ADDR_SURF_P2) |
1034 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1035 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1036 break;
1037 case 19:
1038 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1039 PIPE_CONFIG(ADDR_SURF_P2) |
1040 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1041 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1042 break;
1043 case 20:
1044 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1045 PIPE_CONFIG(ADDR_SURF_P2) |
1046 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1047 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1048 break;
1049 case 21:
1050 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1051 PIPE_CONFIG(ADDR_SURF_P2) |
1052 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1053 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1054 break;
1055 case 22:
1056 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1057 PIPE_CONFIG(ADDR_SURF_P2) |
1058 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1059 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1060 break;
1061 case 24:
1062 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1063 PIPE_CONFIG(ADDR_SURF_P2) |
1064 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1065 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1066 break;
1067 case 25:
1068 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1069 PIPE_CONFIG(ADDR_SURF_P2) |
1070 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1071 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1072 break;
1073 case 26:
1074 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1075 PIPE_CONFIG(ADDR_SURF_P2) |
1076 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1077 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1078 break;
1079 case 27:
1080 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1081 PIPE_CONFIG(ADDR_SURF_P2) |
1082 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1083 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1084 break;
1085 case 28:
1086 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1087 PIPE_CONFIG(ADDR_SURF_P2) |
1088 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1089 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1090 break;
1091 case 29:
1092 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1093 PIPE_CONFIG(ADDR_SURF_P2) |
1094 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1095 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1096 break;
1097 case 7:
1098 case 12:
1099 case 17:
1100 case 23:
1101 /* unused idx */
1102 continue;
1103 default:
1104 gb_tile_moden = 0;
1105 break;
1106 };
1107 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1108 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1109 }
1110 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1111 switch (reg_offset) {
1112 case 0:
1113 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1114 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1115 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1116 NUM_BANKS(ADDR_SURF_8_BANK));
1117 break;
1118 case 1:
1119 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1120 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1121 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1122 NUM_BANKS(ADDR_SURF_8_BANK));
1123 break;
1124 case 2:
1125 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1126 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1127 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1128 NUM_BANKS(ADDR_SURF_8_BANK));
1129 break;
1130 case 3:
1131 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1132 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1133 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1134 NUM_BANKS(ADDR_SURF_8_BANK));
1135 break;
1136 case 4:
1137 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1138 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1139 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1140 NUM_BANKS(ADDR_SURF_8_BANK));
1141 break;
1142 case 5:
1143 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1144 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1145 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1146 NUM_BANKS(ADDR_SURF_8_BANK));
1147 break;
1148 case 6:
1149 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1150 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1151 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1152 NUM_BANKS(ADDR_SURF_8_BANK));
1153 break;
1154 case 8:
1155 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1156 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1157 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1158 NUM_BANKS(ADDR_SURF_16_BANK));
1159 break;
1160 case 9:
1161 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1162 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1163 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1164 NUM_BANKS(ADDR_SURF_16_BANK));
1165 break;
1166 case 10:
1167 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1168 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1169 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1170 NUM_BANKS(ADDR_SURF_16_BANK));
1171 break;
1172 case 11:
1173 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1174 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1175 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1176 NUM_BANKS(ADDR_SURF_16_BANK));
1177 break;
1178 case 12:
1179 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1180 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1181 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1182 NUM_BANKS(ADDR_SURF_16_BANK));
1183 break;
1184 case 13:
1185 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1186 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1187 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1188 NUM_BANKS(ADDR_SURF_16_BANK));
1189 break;
1190 case 14:
1191 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1192 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1193 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1194 NUM_BANKS(ADDR_SURF_8_BANK));
1195 break;
1196 case 7:
1197 /* unused idx */
1198 continue;
1199 default:
1200 gb_tile_moden = 0;
1201 break;
1202 };
1203 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1204 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1205 }
1206 case CHIP_TONGA:
1207 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1208 switch (reg_offset) {
1209 case 0:
1210 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1211 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1212 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1213 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1214 break;
1215 case 1:
1216 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1217 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1218 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1219 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1220 break;
1221 case 2:
1222 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1223 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1224 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1225 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1226 break;
1227 case 3:
1228 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1229 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1230 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1231 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1232 break;
1233 case 4:
1234 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1235 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1236 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1237 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1238 break;
1239 case 5:
1240 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1241 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1242 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1243 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1244 break;
1245 case 6:
1246 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1247 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1248 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1249 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1250 break;
1251 case 7:
1252 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1253 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1254 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1255 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1256 break;
1257 case 8:
1258 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1259 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1260 break;
1261 case 9:
1262 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1263 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1264 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1265 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1266 break;
1267 case 10:
1268 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1269 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1270 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1271 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1272 break;
1273 case 11:
1274 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1275 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1276 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1277 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1278 break;
1279 case 12:
1280 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1281 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1282 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1283 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1284 break;
1285 case 13:
1286 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1287 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1288 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1289 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1290 break;
1291 case 14:
1292 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1293 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1294 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1295 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1296 break;
1297 case 15:
1298 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1299 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1300 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1301 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1302 break;
1303 case 16:
1304 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1305 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1306 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1307 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1308 break;
1309 case 17:
1310 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1311 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1312 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1313 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1314 break;
1315 case 18:
1316 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1317 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1318 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1319 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1320 break;
1321 case 19:
1322 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1323 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1324 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1325 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1326 break;
1327 case 20:
1328 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1329 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1330 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1331 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1332 break;
1333 case 21:
1334 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1335 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1336 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1337 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1338 break;
1339 case 22:
1340 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1341 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1342 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1343 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1344 break;
1345 case 23:
1346 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1347 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1348 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1349 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1350 break;
1351 case 24:
1352 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1353 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1354 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1355 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1356 break;
1357 case 25:
1358 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1359 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1360 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1361 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1362 break;
1363 case 26:
1364 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1365 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1366 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1367 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1368 break;
1369 case 27:
1370 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1371 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1372 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1373 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1374 break;
1375 case 28:
1376 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1377 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1378 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1379 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1380 break;
1381 case 29:
1382 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1383 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1384 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1385 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1386 break;
1387 case 30:
1388 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1389 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1390 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1391 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1392 break;
1393 default:
1394 gb_tile_moden = 0;
1395 break;
1396 };
1397 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1398 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1399 }
1400 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1401 switch (reg_offset) {
1402 case 0:
1403 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1404 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1405 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1406 NUM_BANKS(ADDR_SURF_16_BANK));
1407 break;
1408 case 1:
1409 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1410 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1411 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1412 NUM_BANKS(ADDR_SURF_16_BANK));
1413 break;
1414 case 2:
1415 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1416 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1417 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1418 NUM_BANKS(ADDR_SURF_16_BANK));
1419 break;
1420 case 3:
1421 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1422 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1423 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1424 NUM_BANKS(ADDR_SURF_16_BANK));
1425 break;
1426 case 4:
1427 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1428 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1429 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1430 NUM_BANKS(ADDR_SURF_16_BANK));
1431 break;
1432 case 5:
1433 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1434 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1435 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1436 NUM_BANKS(ADDR_SURF_16_BANK));
1437 break;
1438 case 6:
1439 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1440 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1441 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1442 NUM_BANKS(ADDR_SURF_16_BANK));
1443 break;
1444 case 8:
1445 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1446 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1447 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1448 NUM_BANKS(ADDR_SURF_16_BANK));
1449 break;
1450 case 9:
1451 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1452 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1453 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1454 NUM_BANKS(ADDR_SURF_16_BANK));
1455 break;
1456 case 10:
1457 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1458 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1459 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1460 NUM_BANKS(ADDR_SURF_16_BANK));
1461 break;
1462 case 11:
1463 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1464 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1465 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1466 NUM_BANKS(ADDR_SURF_16_BANK));
1467 break;
1468 case 12:
1469 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1470 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1471 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1472 NUM_BANKS(ADDR_SURF_8_BANK));
1473 break;
1474 case 13:
1475 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1476 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1477 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1478 NUM_BANKS(ADDR_SURF_4_BANK));
1479 break;
1480 case 14:
1481 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1482 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1483 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1484 NUM_BANKS(ADDR_SURF_4_BANK));
1485 break;
1486 case 7:
1487 /* unused idx */
1488 continue;
1489 default:
1490 gb_tile_moden = 0;
1491 break;
1492 };
1493 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1494 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1495 }
1496 break;
1497 case CHIP_CARRIZO:
1498 default:
1499 for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1500 switch (reg_offset) {
1501 case 0:
1502 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1503 PIPE_CONFIG(ADDR_SURF_P2) |
1504 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
1505 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1506 break;
1507 case 1:
1508 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1509 PIPE_CONFIG(ADDR_SURF_P2) |
1510 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
1511 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1512 break;
1513 case 2:
1514 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1515 PIPE_CONFIG(ADDR_SURF_P2) |
1516 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
1517 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1518 break;
1519 case 3:
1520 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1521 PIPE_CONFIG(ADDR_SURF_P2) |
1522 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
1523 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1524 break;
1525 case 4:
1526 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1527 PIPE_CONFIG(ADDR_SURF_P2) |
1528 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1529 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1530 break;
1531 case 5:
1532 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1533 PIPE_CONFIG(ADDR_SURF_P2) |
1534 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1535 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1536 break;
1537 case 6:
1538 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1539 PIPE_CONFIG(ADDR_SURF_P2) |
1540 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_2KB) |
1541 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1542 break;
1543 case 8:
1544 gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1545 PIPE_CONFIG(ADDR_SURF_P2));
1546 break;
1547 case 9:
1548 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1549 PIPE_CONFIG(ADDR_SURF_P2) |
1550 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1551 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1552 break;
1553 case 10:
1554 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1555 PIPE_CONFIG(ADDR_SURF_P2) |
1556 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1557 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1558 break;
1559 case 11:
1560 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1561 PIPE_CONFIG(ADDR_SURF_P2) |
1562 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1563 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1564 break;
1565 case 13:
1566 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1567 PIPE_CONFIG(ADDR_SURF_P2) |
1568 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1569 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1570 break;
1571 case 14:
1572 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1573 PIPE_CONFIG(ADDR_SURF_P2) |
1574 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1575 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1576 break;
1577 case 15:
1578 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
1579 PIPE_CONFIG(ADDR_SURF_P2) |
1580 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1581 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1582 break;
1583 case 16:
1584 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1585 PIPE_CONFIG(ADDR_SURF_P2) |
1586 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1587 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1588 break;
1589 case 18:
1590 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1591 PIPE_CONFIG(ADDR_SURF_P2) |
1592 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1593 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1594 break;
1595 case 19:
1596 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
1597 PIPE_CONFIG(ADDR_SURF_P2) |
1598 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1599 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1600 break;
1601 case 20:
1602 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1603 PIPE_CONFIG(ADDR_SURF_P2) |
1604 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1605 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1606 break;
1607 case 21:
1608 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
1609 PIPE_CONFIG(ADDR_SURF_P2) |
1610 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1611 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1612 break;
1613 case 22:
1614 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
1615 PIPE_CONFIG(ADDR_SURF_P2) |
1616 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1617 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1618 break;
1619 case 24:
1620 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
1621 PIPE_CONFIG(ADDR_SURF_P2) |
1622 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1623 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1624 break;
1625 case 25:
1626 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
1627 PIPE_CONFIG(ADDR_SURF_P2) |
1628 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1629 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1630 break;
1631 case 26:
1632 gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
1633 PIPE_CONFIG(ADDR_SURF_P2) |
1634 MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
1635 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
1636 break;
1637 case 27:
1638 gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1639 PIPE_CONFIG(ADDR_SURF_P2) |
1640 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1641 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1642 break;
1643 case 28:
1644 gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1645 PIPE_CONFIG(ADDR_SURF_P2) |
1646 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1647 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1648 break;
1649 case 29:
1650 gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1651 PIPE_CONFIG(ADDR_SURF_P2) |
1652 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1653 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
1654 break;
1655 case 7:
1656 case 12:
1657 case 17:
1658 case 23:
1659 /* unused idx */
1660 continue;
1661 default:
1662 gb_tile_moden = 0;
1663 break;
1664 };
1665 adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
1666 WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
1667 }
1668 for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1669 switch (reg_offset) {
1670 case 0:
1671 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1672 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1673 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1674 NUM_BANKS(ADDR_SURF_8_BANK));
1675 break;
1676 case 1:
1677 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1678 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1679 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1680 NUM_BANKS(ADDR_SURF_8_BANK));
1681 break;
1682 case 2:
1683 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1684 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1685 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1686 NUM_BANKS(ADDR_SURF_8_BANK));
1687 break;
1688 case 3:
1689 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1690 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1691 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1692 NUM_BANKS(ADDR_SURF_8_BANK));
1693 break;
1694 case 4:
1695 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1696 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1697 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1698 NUM_BANKS(ADDR_SURF_8_BANK));
1699 break;
1700 case 5:
1701 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1702 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1703 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1704 NUM_BANKS(ADDR_SURF_8_BANK));
1705 break;
1706 case 6:
1707 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1708 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1709 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1710 NUM_BANKS(ADDR_SURF_8_BANK));
1711 break;
1712 case 8:
1713 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1714 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1715 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1716 NUM_BANKS(ADDR_SURF_16_BANK));
1717 break;
1718 case 9:
1719 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1720 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1721 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1722 NUM_BANKS(ADDR_SURF_16_BANK));
1723 break;
1724 case 10:
1725 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1726 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1727 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1728 NUM_BANKS(ADDR_SURF_16_BANK));
1729 break;
1730 case 11:
1731 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1732 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1733 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1734 NUM_BANKS(ADDR_SURF_16_BANK));
1735 break;
1736 case 12:
1737 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1738 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1739 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1740 NUM_BANKS(ADDR_SURF_16_BANK));
1741 break;
1742 case 13:
1743 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1744 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1745 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1746 NUM_BANKS(ADDR_SURF_16_BANK));
1747 break;
1748 case 14:
1749 gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1750 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1751 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1752 NUM_BANKS(ADDR_SURF_8_BANK));
1753 break;
1754 case 7:
1755 /* unused idx */
1756 continue;
1757 default:
1758 gb_tile_moden = 0;
1759 break;
1760 };
1761 adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
1762 WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
1763 }
1764 }
1765}
1766
1767static u32 gfx_v8_0_create_bitmask(u32 bit_width)
1768{
1769 u32 i, mask = 0;
1770
1771 for (i = 0; i < bit_width; i++) {
1772 mask <<= 1;
1773 mask |= 1;
1774 }
1775 return mask;
1776}
1777
1778void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
1779{
1780 u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
1781
1782 if ((se_num == 0xffffffff) && (sh_num == 0xffffffff)) {
1783 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1784 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1785 } else if (se_num == 0xffffffff) {
1786 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1787 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_BROADCAST_WRITES, 1);
1788 } else if (sh_num == 0xffffffff) {
1789 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_BROADCAST_WRITES, 1);
1790 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1791 } else {
1792 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
1793 data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SE_INDEX, se_num);
1794 }
1795 WREG32(mmGRBM_GFX_INDEX, data);
1796}
1797
1798static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
1799 u32 max_rb_num_per_se,
1800 u32 sh_per_se)
1801{
1802 u32 data, mask;
1803
1804 data = RREG32(mmCC_RB_BACKEND_DISABLE);
1805 if (data & 1)
1806 data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
1807 else
1808 data = 0;
1809
1810 data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
1811
1812 data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
1813
1814 mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
1815
1816 return data & mask;
1817}
1818
1819static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
1820 u32 se_num, u32 sh_per_se,
1821 u32 max_rb_num_per_se)
1822{
1823 int i, j;
1824 u32 data, mask;
1825 u32 disabled_rbs = 0;
1826 u32 enabled_rbs = 0;
1827
1828 mutex_lock(&adev->grbm_idx_mutex);
1829 for (i = 0; i < se_num; i++) {
1830 for (j = 0; j < sh_per_se; j++) {
1831 gfx_v8_0_select_se_sh(adev, i, j);
1832 data = gfx_v8_0_get_rb_disabled(adev,
1833 max_rb_num_per_se, sh_per_se);
1834 disabled_rbs |= data << ((i * sh_per_se + j) *
1835 RB_BITMAP_WIDTH_PER_SH);
1836 }
1837 }
1838 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
1839 mutex_unlock(&adev->grbm_idx_mutex);
1840
1841 mask = 1;
1842 for (i = 0; i < max_rb_num_per_se * se_num; i++) {
1843 if (!(disabled_rbs & mask))
1844 enabled_rbs |= mask;
1845 mask <<= 1;
1846 }
1847
1848 adev->gfx.config.backend_enable_mask = enabled_rbs;
1849
1850 mutex_lock(&adev->grbm_idx_mutex);
1851 for (i = 0; i < se_num; i++) {
1852 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
1853 data = 0;
1854 for (j = 0; j < sh_per_se; j++) {
1855 switch (enabled_rbs & 3) {
1856 case 0:
1857 if (j == 0)
1858 data |= (RASTER_CONFIG_RB_MAP_3 <<
1859 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
1860 else
1861 data |= (RASTER_CONFIG_RB_MAP_0 <<
1862 PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
1863 break;
1864 case 1:
1865 data |= (RASTER_CONFIG_RB_MAP_0 <<
1866 (i * sh_per_se + j) * 2);
1867 break;
1868 case 2:
1869 data |= (RASTER_CONFIG_RB_MAP_3 <<
1870 (i * sh_per_se + j) * 2);
1871 break;
1872 case 3:
1873 default:
1874 data |= (RASTER_CONFIG_RB_MAP_2 <<
1875 (i * sh_per_se + j) * 2);
1876 break;
1877 }
1878 enabled_rbs >>= 2;
1879 }
1880 WREG32(mmPA_SC_RASTER_CONFIG, data);
1881 }
1882 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
1883 mutex_unlock(&adev->grbm_idx_mutex);
1884}
1885
1886static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
1887{
1888 u32 gb_addr_config;
1889 u32 mc_shared_chmap, mc_arb_ramcfg;
1890 u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
1891 u32 tmp;
1892 int i;
1893
1894 switch (adev->asic_type) {
1895 case CHIP_TOPAZ:
1896 adev->gfx.config.max_shader_engines = 1;
1897 adev->gfx.config.max_tile_pipes = 2;
1898 adev->gfx.config.max_cu_per_sh = 6;
1899 adev->gfx.config.max_sh_per_se = 1;
1900 adev->gfx.config.max_backends_per_se = 2;
1901 adev->gfx.config.max_texture_channel_caches = 2;
1902 adev->gfx.config.max_gprs = 256;
1903 adev->gfx.config.max_gs_threads = 32;
1904 adev->gfx.config.max_hw_contexts = 8;
1905
1906 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1907 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1908 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1909 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1910 gb_addr_config = TOPAZ_GB_ADDR_CONFIG_GOLDEN;
1911 break;
1912 case CHIP_TONGA:
1913 adev->gfx.config.max_shader_engines = 4;
1914 adev->gfx.config.max_tile_pipes = 8;
1915 adev->gfx.config.max_cu_per_sh = 8;
1916 adev->gfx.config.max_sh_per_se = 1;
1917 adev->gfx.config.max_backends_per_se = 2;
1918 adev->gfx.config.max_texture_channel_caches = 8;
1919 adev->gfx.config.max_gprs = 256;
1920 adev->gfx.config.max_gs_threads = 32;
1921 adev->gfx.config.max_hw_contexts = 8;
1922
1923 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1924 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1925 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1926 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1927 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1928 break;
1929 case CHIP_CARRIZO:
1930 adev->gfx.config.max_shader_engines = 1;
1931 adev->gfx.config.max_tile_pipes = 2;
1932 adev->gfx.config.max_cu_per_sh = 8;
1933 adev->gfx.config.max_sh_per_se = 1;
1934 adev->gfx.config.max_backends_per_se = 2;
1935 adev->gfx.config.max_texture_channel_caches = 2;
1936 adev->gfx.config.max_gprs = 256;
1937 adev->gfx.config.max_gs_threads = 32;
1938 adev->gfx.config.max_hw_contexts = 8;
1939
1940 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1941 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1942 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1943 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1944 gb_addr_config = CARRIZO_GB_ADDR_CONFIG_GOLDEN;
1945 break;
1946 default:
1947 adev->gfx.config.max_shader_engines = 2;
1948 adev->gfx.config.max_tile_pipes = 4;
1949 adev->gfx.config.max_cu_per_sh = 2;
1950 adev->gfx.config.max_sh_per_se = 1;
1951 adev->gfx.config.max_backends_per_se = 2;
1952 adev->gfx.config.max_texture_channel_caches = 4;
1953 adev->gfx.config.max_gprs = 256;
1954 adev->gfx.config.max_gs_threads = 32;
1955 adev->gfx.config.max_hw_contexts = 8;
1956
1957 adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
1958 adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
1959 adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
1960 adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
1961 gb_addr_config = TONGA_GB_ADDR_CONFIG_GOLDEN;
1962 break;
1963 }
1964
1965 tmp = RREG32(mmGRBM_CNTL);
1966 tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
1967 WREG32(mmGRBM_CNTL, tmp);
1968
1969 mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
1970 adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
1971 mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
1972
1973 adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
1974 adev->gfx.config.mem_max_burst_length_bytes = 256;
1975 if (adev->flags & AMDGPU_IS_APU) {
1976 /* Get memory bank mapping mode. */
1977 tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
1978 dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1979 dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1980
1981 tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
1982 dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
1983 dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
1984
1985 /* Validate settings in case only one DIMM installed. */
1986 if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
1987 dimm00_addr_map = 0;
1988 if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
1989 dimm01_addr_map = 0;
1990 if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
1991 dimm10_addr_map = 0;
1992 if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
1993 dimm11_addr_map = 0;
1994
1995 /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
1996 /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
1997 if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
1998 adev->gfx.config.mem_row_size_in_kb = 2;
1999 else
2000 adev->gfx.config.mem_row_size_in_kb = 1;
2001 } else {
2002 tmp = REG_GET_FIELD(mc_arb_ramcfg, MC_ARB_RAMCFG, NOOFCOLS);
2003 adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2004 if (adev->gfx.config.mem_row_size_in_kb > 4)
2005 adev->gfx.config.mem_row_size_in_kb = 4;
2006 }
2007
2008 adev->gfx.config.shader_engine_tile_size = 32;
2009 adev->gfx.config.num_gpus = 1;
2010 adev->gfx.config.multi_gpu_tile_size = 64;
2011
2012 /* fix up row size */
2013 switch (adev->gfx.config.mem_row_size_in_kb) {
2014 case 1:
2015 default:
2016 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 0);
2017 break;
2018 case 2:
2019 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 1);
2020 break;
2021 case 4:
2022 gb_addr_config = REG_SET_FIELD(gb_addr_config, GB_ADDR_CONFIG, ROW_SIZE, 2);
2023 break;
2024 }
2025 adev->gfx.config.gb_addr_config = gb_addr_config;
2026
2027 WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
2028 WREG32(mmHDP_ADDR_CONFIG, gb_addr_config);
2029 WREG32(mmDMIF_ADDR_CALC, gb_addr_config);
2030 WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
2031 gb_addr_config & 0x70);
2032 WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
2033 gb_addr_config & 0x70);
2034 WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config);
2035 WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2036 WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2037
2038 gfx_v8_0_tiling_mode_table_init(adev);
2039
2040 gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
2041 adev->gfx.config.max_sh_per_se,
2042 adev->gfx.config.max_backends_per_se);
2043
2044 /* XXX SH_MEM regs */
2045 /* where to put LDS, scratch, GPUVM in FSA64 space */
2046 mutex_lock(&adev->srbm_mutex);
2047 for (i = 0; i < 16; i++) {
2048 vi_srbm_select(adev, 0, 0, 0, i);
2049 /* CP and shaders */
2050 if (i == 0) {
2051 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_UC);
2052 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_UC);
2053 WREG32(mmSH_MEM_CONFIG, tmp);
2054 } else {
2055 tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, DEFAULT_MTYPE, MTYPE_NC);
2056 tmp = REG_SET_FIELD(tmp, SH_MEM_CONFIG, APE1_MTYPE, MTYPE_NC);
2057 WREG32(mmSH_MEM_CONFIG, tmp);
2058 }
2059
2060 WREG32(mmSH_MEM_APE1_BASE, 1);
2061 WREG32(mmSH_MEM_APE1_LIMIT, 0);
2062 WREG32(mmSH_MEM_BASES, 0);
2063 }
2064 vi_srbm_select(adev, 0, 0, 0, 0);
2065 mutex_unlock(&adev->srbm_mutex);
2066
2067 mutex_lock(&adev->grbm_idx_mutex);
2068 /*
2069 * making sure that the following register writes will be broadcasted
2070 * to all the shaders
2071 */
2072 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2073
2074 WREG32(mmPA_SC_FIFO_SIZE,
2075 (adev->gfx.config.sc_prim_fifo_size_frontend <<
2076 PA_SC_FIFO_SIZE__SC_FRONTEND_PRIM_FIFO_SIZE__SHIFT) |
2077 (adev->gfx.config.sc_prim_fifo_size_backend <<
2078 PA_SC_FIFO_SIZE__SC_BACKEND_PRIM_FIFO_SIZE__SHIFT) |
2079 (adev->gfx.config.sc_hiz_tile_fifo_size <<
2080 PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
2081 (adev->gfx.config.sc_earlyz_tile_fifo_size <<
2082 PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
2083 mutex_unlock(&adev->grbm_idx_mutex);
2084
2085}
2086
2087static void gfx_v8_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
2088{
2089 u32 i, j, k;
2090 u32 mask;
2091
2092 mutex_lock(&adev->grbm_idx_mutex);
2093 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
2094 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
2095 gfx_v8_0_select_se_sh(adev, i, j);
2096 for (k = 0; k < adev->usec_timeout; k++) {
2097 if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
2098 break;
2099 udelay(1);
2100 }
2101 }
2102 }
2103 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
2104 mutex_unlock(&adev->grbm_idx_mutex);
2105
2106 mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
2107 RLC_SERDES_NONCU_MASTER_BUSY__GC_MASTER_BUSY_MASK |
2108 RLC_SERDES_NONCU_MASTER_BUSY__TC0_MASTER_BUSY_MASK |
2109 RLC_SERDES_NONCU_MASTER_BUSY__TC1_MASTER_BUSY_MASK;
2110 for (k = 0; k < adev->usec_timeout; k++) {
2111 if ((RREG32(mmRLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
2112 break;
2113 udelay(1);
2114 }
2115}
2116
2117static void gfx_v8_0_enable_gui_idle_interrupt(struct amdgpu_device *adev,
2118 bool enable)
2119{
2120 u32 tmp = RREG32(mmCP_INT_CNTL_RING0);
2121
2122 if (enable) {
2123 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 1);
2124 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 1);
2125 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 1);
2126 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 1);
2127 } else {
2128 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE, 0);
2129 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CNTX_EMPTY_INT_ENABLE, 0);
2130 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, CMP_BUSY_INT_ENABLE, 0);
2131 tmp = REG_SET_FIELD(tmp, CP_INT_CNTL_RING0, GFX_IDLE_INT_ENABLE, 0);
2132 }
2133 WREG32(mmCP_INT_CNTL_RING0, tmp);
2134}
2135
2136void gfx_v8_0_rlc_stop(struct amdgpu_device *adev)
2137{
2138 u32 tmp = RREG32(mmRLC_CNTL);
2139
2140 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 0);
2141 WREG32(mmRLC_CNTL, tmp);
2142
2143 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
2144
2145 gfx_v8_0_wait_for_rlc_serdes(adev);
2146}
2147
2148static void gfx_v8_0_rlc_reset(struct amdgpu_device *adev)
2149{
2150 u32 tmp = RREG32(mmGRBM_SOFT_RESET);
2151
2152 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
2153 WREG32(mmGRBM_SOFT_RESET, tmp);
2154 udelay(50);
2155 tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
2156 WREG32(mmGRBM_SOFT_RESET, tmp);
2157 udelay(50);
2158}
2159
2160static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
2161{
2162 u32 tmp = RREG32(mmRLC_CNTL);
2163
2164 tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
2165 WREG32(mmRLC_CNTL, tmp);
2166
2167 /* carrizo do enable cp interrupt after cp inited */
2168 if (adev->asic_type != CHIP_CARRIZO)
2169 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
2170
2171 udelay(50);
2172}
2173
2174static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev)
2175{
2176 const struct rlc_firmware_header_v2_0 *hdr;
2177 const __le32 *fw_data;
2178 unsigned i, fw_size;
2179
2180 if (!adev->gfx.rlc_fw)
2181 return -EINVAL;
2182
2183 hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data;
2184 amdgpu_ucode_print_rlc_hdr(&hdr->header);
2185 adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version);
2186
2187 fw_data = (const __le32 *)(adev->gfx.rlc_fw->data +
2188 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2189 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
2190
2191 WREG32(mmRLC_GPM_UCODE_ADDR, 0);
2192 for (i = 0; i < fw_size; i++)
2193 WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
2194 WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version);
2195
2196 return 0;
2197}
2198
2199static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
2200{
2201 int r;
2202
2203 gfx_v8_0_rlc_stop(adev);
2204
2205 /* disable CG */
2206 WREG32(mmRLC_CGCG_CGLS_CTRL, 0);
2207
2208 /* disable PG */
2209 WREG32(mmRLC_PG_CNTL, 0);
2210
2211 gfx_v8_0_rlc_reset(adev);
2212
2213 if (!adev->firmware.smu_load) {
2214 /* legacy rlc firmware loading */
2215 r = gfx_v8_0_rlc_load_microcode(adev);
2216 if (r)
2217 return r;
2218 } else {
2219 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
2220 AMDGPU_UCODE_ID_RLC_G);
2221 if (r)
2222 return -EINVAL;
2223 }
2224
2225 gfx_v8_0_rlc_start(adev);
2226
2227 return 0;
2228}
2229
2230static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
2231{
2232 int i;
2233 u32 tmp = RREG32(mmCP_ME_CNTL);
2234
2235 if (enable) {
2236 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
2237 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
2238 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
2239 } else {
2240 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
2241 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
2242 tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
2243 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
2244 adev->gfx.gfx_ring[i].ready = false;
2245 }
2246 WREG32(mmCP_ME_CNTL, tmp);
2247 udelay(50);
2248}
2249
2250static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev)
2251{
2252 const struct gfx_firmware_header_v1_0 *pfp_hdr;
2253 const struct gfx_firmware_header_v1_0 *ce_hdr;
2254 const struct gfx_firmware_header_v1_0 *me_hdr;
2255 const __le32 *fw_data;
2256 unsigned i, fw_size;
2257
2258 if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw)
2259 return -EINVAL;
2260
2261 pfp_hdr = (const struct gfx_firmware_header_v1_0 *)
2262 adev->gfx.pfp_fw->data;
2263 ce_hdr = (const struct gfx_firmware_header_v1_0 *)
2264 adev->gfx.ce_fw->data;
2265 me_hdr = (const struct gfx_firmware_header_v1_0 *)
2266 adev->gfx.me_fw->data;
2267
2268 amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header);
2269 amdgpu_ucode_print_gfx_hdr(&ce_hdr->header);
2270 amdgpu_ucode_print_gfx_hdr(&me_hdr->header);
2271 adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version);
2272 adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version);
2273 adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version);
2274
2275 gfx_v8_0_cp_gfx_enable(adev, false);
2276
2277 /* PFP */
2278 fw_data = (const __le32 *)
2279 (adev->gfx.pfp_fw->data +
2280 le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
2281 fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
2282 WREG32(mmCP_PFP_UCODE_ADDR, 0);
2283 for (i = 0; i < fw_size; i++)
2284 WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
2285 WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version);
2286
2287 /* CE */
2288 fw_data = (const __le32 *)
2289 (adev->gfx.ce_fw->data +
2290 le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
2291 fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
2292 WREG32(mmCP_CE_UCODE_ADDR, 0);
2293 for (i = 0; i < fw_size; i++)
2294 WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
2295 WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version);
2296
2297 /* ME */
2298 fw_data = (const __le32 *)
2299 (adev->gfx.me_fw->data +
2300 le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
2301 fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
2302 WREG32(mmCP_ME_RAM_WADDR, 0);
2303 for (i = 0; i < fw_size; i++)
2304 WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++));
2305 WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version);
2306
2307 return 0;
2308}
2309
2310static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev)
2311{
2312 u32 count = 0;
2313 const struct cs_section_def *sect = NULL;
2314 const struct cs_extent_def *ext = NULL;
2315
2316 /* begin clear state */
2317 count += 2;
2318 /* context control state */
2319 count += 3;
2320
2321 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2322 for (ext = sect->section; ext->extent != NULL; ++ext) {
2323 if (sect->id == SECT_CONTEXT)
2324 count += 2 + ext->reg_count;
2325 else
2326 return 0;
2327 }
2328 }
2329 /* pa_sc_raster_config/pa_sc_raster_config1 */
2330 count += 4;
2331 /* end clear state */
2332 count += 2;
2333 /* clear state */
2334 count += 2;
2335
2336 return count;
2337}
2338
2339static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
2340{
2341 struct amdgpu_ring *ring = &adev->gfx.gfx_ring[0];
2342 const struct cs_section_def *sect = NULL;
2343 const struct cs_extent_def *ext = NULL;
2344 int r, i;
2345
2346 /* init the CP */
2347 WREG32(mmCP_MAX_CONTEXT, adev->gfx.config.max_hw_contexts - 1);
2348 WREG32(mmCP_ENDIAN_SWAP, 0);
2349 WREG32(mmCP_DEVICE_ID, 1);
2350
2351 gfx_v8_0_cp_gfx_enable(adev, true);
2352
2353 r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
2354 if (r) {
2355 DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
2356 return r;
2357 }
2358
2359 /* clear state buffer */
2360 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2361 amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2362
2363 amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2364 amdgpu_ring_write(ring, 0x80000000);
2365 amdgpu_ring_write(ring, 0x80000000);
2366
2367 for (sect = vi_cs_data; sect->section != NULL; ++sect) {
2368 for (ext = sect->section; ext->extent != NULL; ++ext) {
2369 if (sect->id == SECT_CONTEXT) {
2370 amdgpu_ring_write(ring,
2371 PACKET3(PACKET3_SET_CONTEXT_REG,
2372 ext->reg_count));
2373 amdgpu_ring_write(ring,
2374 ext->reg_index - PACKET3_SET_CONTEXT_REG_START);
2375 for (i = 0; i < ext->reg_count; i++)
2376 amdgpu_ring_write(ring, ext->extent[i]);
2377 }
2378 }
2379 }
2380
2381 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2382 amdgpu_ring_write(ring, mmPA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
2383 switch (adev->asic_type) {
2384 case CHIP_TONGA:
2385 amdgpu_ring_write(ring, 0x16000012);
2386 amdgpu_ring_write(ring, 0x0000002A);
2387 break;
2388 case CHIP_TOPAZ:
2389 case CHIP_CARRIZO:
2390 amdgpu_ring_write(ring, 0x00000002);
2391 amdgpu_ring_write(ring, 0x00000000);
2392 break;
2393 default:
2394 BUG();
2395 }
2396
2397 amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2398 amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2399
2400 amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2401 amdgpu_ring_write(ring, 0);
2402
2403 /* init the CE partitions */
2404 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2405 amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2406 amdgpu_ring_write(ring, 0x8000);
2407 amdgpu_ring_write(ring, 0x8000);
2408
2409 amdgpu_ring_unlock_commit(ring);
2410
2411 return 0;
2412}
2413
2414static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
2415{
2416 struct amdgpu_ring *ring;
2417 u32 tmp;
2418 u32 rb_bufsz;
2419 u64 rb_addr, rptr_addr;
2420 int r;
2421
2422 /* Set the write pointer delay */
2423 WREG32(mmCP_RB_WPTR_DELAY, 0);
2424
2425 /* set the RB to use vmid 0 */
2426 WREG32(mmCP_RB_VMID, 0);
2427
2428 /* Set ring buffer size */
2429 ring = &adev->gfx.gfx_ring[0];
2430 rb_bufsz = order_base_2(ring->ring_size / 8);
2431 tmp = REG_SET_FIELD(0, CP_RB0_CNTL, RB_BUFSZ, rb_bufsz);
2432 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, RB_BLKSZ, rb_bufsz - 2);
2433 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MTYPE, 3);
2434 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, MIN_IB_AVAILSZ, 1);
2435#ifdef __BIG_ENDIAN
2436 tmp = REG_SET_FIELD(tmp, CP_RB0_CNTL, BUF_SWAP, 1);
2437#endif
2438 WREG32(mmCP_RB0_CNTL, tmp);
2439
2440 /* Initialize the ring buffer's read and write pointers */
2441 WREG32(mmCP_RB0_CNTL, tmp | CP_RB0_CNTL__RB_RPTR_WR_ENA_MASK);
2442 ring->wptr = 0;
2443 WREG32(mmCP_RB0_WPTR, ring->wptr);
2444
2445 /* set the wb address wether it's enabled or not */
2446 rptr_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
2447 WREG32(mmCP_RB0_RPTR_ADDR, lower_32_bits(rptr_addr));
2448 WREG32(mmCP_RB0_RPTR_ADDR_HI, upper_32_bits(rptr_addr) & 0xFF);
2449
2450 mdelay(1);
2451 WREG32(mmCP_RB0_CNTL, tmp);
2452
2453 rb_addr = ring->gpu_addr >> 8;
2454 WREG32(mmCP_RB0_BASE, rb_addr);
2455 WREG32(mmCP_RB0_BASE_HI, upper_32_bits(rb_addr));
2456
2457 /* no gfx doorbells on iceland */
2458 if (adev->asic_type != CHIP_TOPAZ) {
2459 tmp = RREG32(mmCP_RB_DOORBELL_CONTROL);
2460 if (ring->use_doorbell) {
2461 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2462 DOORBELL_OFFSET, ring->doorbell_index);
2463 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2464 DOORBELL_EN, 1);
2465 } else {
2466 tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL,
2467 DOORBELL_EN, 0);
2468 }
2469 WREG32(mmCP_RB_DOORBELL_CONTROL, tmp);
2470
2471 if (adev->asic_type == CHIP_TONGA) {
2472 tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER,
2473 DOORBELL_RANGE_LOWER,
2474 AMDGPU_DOORBELL_GFX_RING0);
2475 WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp);
2476
2477 WREG32(mmCP_RB_DOORBELL_RANGE_UPPER,
2478 CP_RB_DOORBELL_RANGE_UPPER__DOORBELL_RANGE_UPPER_MASK);
2479 }
2480
2481 }
2482
2483 /* start the ring */
2484 gfx_v8_0_cp_gfx_start(adev);
2485 ring->ready = true;
2486 r = amdgpu_ring_test_ring(ring);
2487 if (r) {
2488 ring->ready = false;
2489 return r;
2490 }
2491
2492 return 0;
2493}
2494
2495static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2496{
2497 int i;
2498
2499 if (enable) {
2500 WREG32(mmCP_MEC_CNTL, 0);
2501 } else {
2502 WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
2503 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2504 adev->gfx.compute_ring[i].ready = false;
2505 }
2506 udelay(50);
2507}
2508
2509static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
2510{
2511 gfx_v8_0_cp_compute_enable(adev, true);
2512
2513 return 0;
2514}
2515
2516static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2517{
2518 const struct gfx_firmware_header_v1_0 *mec_hdr;
2519 const __le32 *fw_data;
2520 unsigned i, fw_size;
2521
2522 if (!adev->gfx.mec_fw)
2523 return -EINVAL;
2524
2525 gfx_v8_0_cp_compute_enable(adev, false);
2526
2527 mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data;
2528 amdgpu_ucode_print_gfx_hdr(&mec_hdr->header);
2529 adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version);
2530
2531 fw_data = (const __le32 *)
2532 (adev->gfx.mec_fw->data +
2533 le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
2534 fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
2535
2536 /* MEC1 */
2537 WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0);
2538 for (i = 0; i < fw_size; i++)
2539 WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i));
2540 WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version);
2541
2542 /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */
2543 if (adev->gfx.mec2_fw) {
2544 const struct gfx_firmware_header_v1_0 *mec2_hdr;
2545
2546 mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data;
2547 amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header);
2548 adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version);
2549
2550 fw_data = (const __le32 *)
2551 (adev->gfx.mec2_fw->data +
2552 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
2553 fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
2554
2555 WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0);
2556 for (i = 0; i < fw_size; i++)
2557 WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i));
2558 WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version);
2559 }
2560
2561 return 0;
2562}
2563
2564struct vi_mqd {
2565 uint32_t header; /* ordinal0 */
2566 uint32_t compute_dispatch_initiator; /* ordinal1 */
2567 uint32_t compute_dim_x; /* ordinal2 */
2568 uint32_t compute_dim_y; /* ordinal3 */
2569 uint32_t compute_dim_z; /* ordinal4 */
2570 uint32_t compute_start_x; /* ordinal5 */
2571 uint32_t compute_start_y; /* ordinal6 */
2572 uint32_t compute_start_z; /* ordinal7 */
2573 uint32_t compute_num_thread_x; /* ordinal8 */
2574 uint32_t compute_num_thread_y; /* ordinal9 */
2575 uint32_t compute_num_thread_z; /* ordinal10 */
2576 uint32_t compute_pipelinestat_enable; /* ordinal11 */
2577 uint32_t compute_perfcount_enable; /* ordinal12 */
2578 uint32_t compute_pgm_lo; /* ordinal13 */
2579 uint32_t compute_pgm_hi; /* ordinal14 */
2580 uint32_t compute_tba_lo; /* ordinal15 */
2581 uint32_t compute_tba_hi; /* ordinal16 */
2582 uint32_t compute_tma_lo; /* ordinal17 */
2583 uint32_t compute_tma_hi; /* ordinal18 */
2584 uint32_t compute_pgm_rsrc1; /* ordinal19 */
2585 uint32_t compute_pgm_rsrc2; /* ordinal20 */
2586 uint32_t compute_vmid; /* ordinal21 */
2587 uint32_t compute_resource_limits; /* ordinal22 */
2588 uint32_t compute_static_thread_mgmt_se0; /* ordinal23 */
2589 uint32_t compute_static_thread_mgmt_se1; /* ordinal24 */
2590 uint32_t compute_tmpring_size; /* ordinal25 */
2591 uint32_t compute_static_thread_mgmt_se2; /* ordinal26 */
2592 uint32_t compute_static_thread_mgmt_se3; /* ordinal27 */
2593 uint32_t compute_restart_x; /* ordinal28 */
2594 uint32_t compute_restart_y; /* ordinal29 */
2595 uint32_t compute_restart_z; /* ordinal30 */
2596 uint32_t compute_thread_trace_enable; /* ordinal31 */
2597 uint32_t compute_misc_reserved; /* ordinal32 */
2598 uint32_t compute_dispatch_id; /* ordinal33 */
2599 uint32_t compute_threadgroup_id; /* ordinal34 */
2600 uint32_t compute_relaunch; /* ordinal35 */
2601 uint32_t compute_wave_restore_addr_lo; /* ordinal36 */
2602 uint32_t compute_wave_restore_addr_hi; /* ordinal37 */
2603 uint32_t compute_wave_restore_control; /* ordinal38 */
2604 uint32_t reserved9; /* ordinal39 */
2605 uint32_t reserved10; /* ordinal40 */
2606 uint32_t reserved11; /* ordinal41 */
2607 uint32_t reserved12; /* ordinal42 */
2608 uint32_t reserved13; /* ordinal43 */
2609 uint32_t reserved14; /* ordinal44 */
2610 uint32_t reserved15; /* ordinal45 */
2611 uint32_t reserved16; /* ordinal46 */
2612 uint32_t reserved17; /* ordinal47 */
2613 uint32_t reserved18; /* ordinal48 */
2614 uint32_t reserved19; /* ordinal49 */
2615 uint32_t reserved20; /* ordinal50 */
2616 uint32_t reserved21; /* ordinal51 */
2617 uint32_t reserved22; /* ordinal52 */
2618 uint32_t reserved23; /* ordinal53 */
2619 uint32_t reserved24; /* ordinal54 */
2620 uint32_t reserved25; /* ordinal55 */
2621 uint32_t reserved26; /* ordinal56 */
2622 uint32_t reserved27; /* ordinal57 */
2623 uint32_t reserved28; /* ordinal58 */
2624 uint32_t reserved29; /* ordinal59 */
2625 uint32_t reserved30; /* ordinal60 */
2626 uint32_t reserved31; /* ordinal61 */
2627 uint32_t reserved32; /* ordinal62 */
2628 uint32_t reserved33; /* ordinal63 */
2629 uint32_t reserved34; /* ordinal64 */
2630 uint32_t compute_user_data_0; /* ordinal65 */
2631 uint32_t compute_user_data_1; /* ordinal66 */
2632 uint32_t compute_user_data_2; /* ordinal67 */
2633 uint32_t compute_user_data_3; /* ordinal68 */
2634 uint32_t compute_user_data_4; /* ordinal69 */
2635 uint32_t compute_user_data_5; /* ordinal70 */
2636 uint32_t compute_user_data_6; /* ordinal71 */
2637 uint32_t compute_user_data_7; /* ordinal72 */
2638 uint32_t compute_user_data_8; /* ordinal73 */
2639 uint32_t compute_user_data_9; /* ordinal74 */
2640 uint32_t compute_user_data_10; /* ordinal75 */
2641 uint32_t compute_user_data_11; /* ordinal76 */
2642 uint32_t compute_user_data_12; /* ordinal77 */
2643 uint32_t compute_user_data_13; /* ordinal78 */
2644 uint32_t compute_user_data_14; /* ordinal79 */
2645 uint32_t compute_user_data_15; /* ordinal80 */
2646 uint32_t cp_compute_csinvoc_count_lo; /* ordinal81 */
2647 uint32_t cp_compute_csinvoc_count_hi; /* ordinal82 */
2648 uint32_t reserved35; /* ordinal83 */
2649 uint32_t reserved36; /* ordinal84 */
2650 uint32_t reserved37; /* ordinal85 */
2651 uint32_t cp_mqd_query_time_lo; /* ordinal86 */
2652 uint32_t cp_mqd_query_time_hi; /* ordinal87 */
2653 uint32_t cp_mqd_connect_start_time_lo; /* ordinal88 */
2654 uint32_t cp_mqd_connect_start_time_hi; /* ordinal89 */
2655 uint32_t cp_mqd_connect_end_time_lo; /* ordinal90 */
2656 uint32_t cp_mqd_connect_end_time_hi; /* ordinal91 */
2657 uint32_t cp_mqd_connect_end_wf_count; /* ordinal92 */
2658 uint32_t cp_mqd_connect_end_pq_rptr; /* ordinal93 */
2659 uint32_t cp_mqd_connect_end_pq_wptr; /* ordinal94 */
2660 uint32_t cp_mqd_connect_end_ib_rptr; /* ordinal95 */
2661 uint32_t reserved38; /* ordinal96 */
2662 uint32_t reserved39; /* ordinal97 */
2663 uint32_t cp_mqd_save_start_time_lo; /* ordinal98 */
2664 uint32_t cp_mqd_save_start_time_hi; /* ordinal99 */
2665 uint32_t cp_mqd_save_end_time_lo; /* ordinal100 */
2666 uint32_t cp_mqd_save_end_time_hi; /* ordinal101 */
2667 uint32_t cp_mqd_restore_start_time_lo; /* ordinal102 */
2668 uint32_t cp_mqd_restore_start_time_hi; /* ordinal103 */
2669 uint32_t cp_mqd_restore_end_time_lo; /* ordinal104 */
2670 uint32_t cp_mqd_restore_end_time_hi; /* ordinal105 */
2671 uint32_t reserved40; /* ordinal106 */
2672 uint32_t reserved41; /* ordinal107 */
2673 uint32_t gds_cs_ctxsw_cnt0; /* ordinal108 */
2674 uint32_t gds_cs_ctxsw_cnt1; /* ordinal109 */
2675 uint32_t gds_cs_ctxsw_cnt2; /* ordinal110 */
2676 uint32_t gds_cs_ctxsw_cnt3; /* ordinal111 */
2677 uint32_t reserved42; /* ordinal112 */
2678 uint32_t reserved43; /* ordinal113 */
2679 uint32_t cp_pq_exe_status_lo; /* ordinal114 */
2680 uint32_t cp_pq_exe_status_hi; /* ordinal115 */
2681 uint32_t cp_packet_id_lo; /* ordinal116 */
2682 uint32_t cp_packet_id_hi; /* ordinal117 */
2683 uint32_t cp_packet_exe_status_lo; /* ordinal118 */
2684 uint32_t cp_packet_exe_status_hi; /* ordinal119 */
2685 uint32_t gds_save_base_addr_lo; /* ordinal120 */
2686 uint32_t gds_save_base_addr_hi; /* ordinal121 */
2687 uint32_t gds_save_mask_lo; /* ordinal122 */
2688 uint32_t gds_save_mask_hi; /* ordinal123 */
2689 uint32_t ctx_save_base_addr_lo; /* ordinal124 */
2690 uint32_t ctx_save_base_addr_hi; /* ordinal125 */
2691 uint32_t reserved44; /* ordinal126 */
2692 uint32_t reserved45; /* ordinal127 */
2693 uint32_t cp_mqd_base_addr_lo; /* ordinal128 */
2694 uint32_t cp_mqd_base_addr_hi; /* ordinal129 */
2695 uint32_t cp_hqd_active; /* ordinal130 */
2696 uint32_t cp_hqd_vmid; /* ordinal131 */
2697 uint32_t cp_hqd_persistent_state; /* ordinal132 */
2698 uint32_t cp_hqd_pipe_priority; /* ordinal133 */
2699 uint32_t cp_hqd_queue_priority; /* ordinal134 */
2700 uint32_t cp_hqd_quantum; /* ordinal135 */
2701 uint32_t cp_hqd_pq_base_lo; /* ordinal136 */
2702 uint32_t cp_hqd_pq_base_hi; /* ordinal137 */
2703 uint32_t cp_hqd_pq_rptr; /* ordinal138 */
2704 uint32_t cp_hqd_pq_rptr_report_addr_lo; /* ordinal139 */
2705 uint32_t cp_hqd_pq_rptr_report_addr_hi; /* ordinal140 */
2706 uint32_t cp_hqd_pq_wptr_poll_addr; /* ordinal141 */
2707 uint32_t cp_hqd_pq_wptr_poll_addr_hi; /* ordinal142 */
2708 uint32_t cp_hqd_pq_doorbell_control; /* ordinal143 */
2709 uint32_t cp_hqd_pq_wptr; /* ordinal144 */
2710 uint32_t cp_hqd_pq_control; /* ordinal145 */
2711 uint32_t cp_hqd_ib_base_addr_lo; /* ordinal146 */
2712 uint32_t cp_hqd_ib_base_addr_hi; /* ordinal147 */
2713 uint32_t cp_hqd_ib_rptr; /* ordinal148 */
2714 uint32_t cp_hqd_ib_control; /* ordinal149 */
2715 uint32_t cp_hqd_iq_timer; /* ordinal150 */
2716 uint32_t cp_hqd_iq_rptr; /* ordinal151 */
2717 uint32_t cp_hqd_dequeue_request; /* ordinal152 */
2718 uint32_t cp_hqd_dma_offload; /* ordinal153 */
2719 uint32_t cp_hqd_sema_cmd; /* ordinal154 */
2720 uint32_t cp_hqd_msg_type; /* ordinal155 */
2721 uint32_t cp_hqd_atomic0_preop_lo; /* ordinal156 */
2722 uint32_t cp_hqd_atomic0_preop_hi; /* ordinal157 */
2723 uint32_t cp_hqd_atomic1_preop_lo; /* ordinal158 */
2724 uint32_t cp_hqd_atomic1_preop_hi; /* ordinal159 */
2725 uint32_t cp_hqd_hq_status0; /* ordinal160 */
2726 uint32_t cp_hqd_hq_control0; /* ordinal161 */
2727 uint32_t cp_mqd_control; /* ordinal162 */
2728 uint32_t cp_hqd_hq_status1; /* ordinal163 */
2729 uint32_t cp_hqd_hq_control1; /* ordinal164 */
2730 uint32_t cp_hqd_eop_base_addr_lo; /* ordinal165 */
2731 uint32_t cp_hqd_eop_base_addr_hi; /* ordinal166 */
2732 uint32_t cp_hqd_eop_control; /* ordinal167 */
2733 uint32_t cp_hqd_eop_rptr; /* ordinal168 */
2734 uint32_t cp_hqd_eop_wptr; /* ordinal169 */
2735 uint32_t cp_hqd_eop_done_events; /* ordinal170 */
2736 uint32_t cp_hqd_ctx_save_base_addr_lo; /* ordinal171 */
2737 uint32_t cp_hqd_ctx_save_base_addr_hi; /* ordinal172 */
2738 uint32_t cp_hqd_ctx_save_control; /* ordinal173 */
2739 uint32_t cp_hqd_cntl_stack_offset; /* ordinal174 */
2740 uint32_t cp_hqd_cntl_stack_size; /* ordinal175 */
2741 uint32_t cp_hqd_wg_state_offset; /* ordinal176 */
2742 uint32_t cp_hqd_ctx_save_size; /* ordinal177 */
2743 uint32_t cp_hqd_gds_resource_state; /* ordinal178 */
2744 uint32_t cp_hqd_error; /* ordinal179 */
2745 uint32_t cp_hqd_eop_wptr_mem; /* ordinal180 */
2746 uint32_t cp_hqd_eop_dones; /* ordinal181 */
2747 uint32_t reserved46; /* ordinal182 */
2748 uint32_t reserved47; /* ordinal183 */
2749 uint32_t reserved48; /* ordinal184 */
2750 uint32_t reserved49; /* ordinal185 */
2751 uint32_t reserved50; /* ordinal186 */
2752 uint32_t reserved51; /* ordinal187 */
2753 uint32_t reserved52; /* ordinal188 */
2754 uint32_t reserved53; /* ordinal189 */
2755 uint32_t reserved54; /* ordinal190 */
2756 uint32_t reserved55; /* ordinal191 */
2757 uint32_t iqtimer_pkt_header; /* ordinal192 */
2758 uint32_t iqtimer_pkt_dw0; /* ordinal193 */
2759 uint32_t iqtimer_pkt_dw1; /* ordinal194 */
2760 uint32_t iqtimer_pkt_dw2; /* ordinal195 */
2761 uint32_t iqtimer_pkt_dw3; /* ordinal196 */
2762 uint32_t iqtimer_pkt_dw4; /* ordinal197 */
2763 uint32_t iqtimer_pkt_dw5; /* ordinal198 */
2764 uint32_t iqtimer_pkt_dw6; /* ordinal199 */
2765 uint32_t iqtimer_pkt_dw7; /* ordinal200 */
2766 uint32_t iqtimer_pkt_dw8; /* ordinal201 */
2767 uint32_t iqtimer_pkt_dw9; /* ordinal202 */
2768 uint32_t iqtimer_pkt_dw10; /* ordinal203 */
2769 uint32_t iqtimer_pkt_dw11; /* ordinal204 */
2770 uint32_t iqtimer_pkt_dw12; /* ordinal205 */
2771 uint32_t iqtimer_pkt_dw13; /* ordinal206 */
2772 uint32_t iqtimer_pkt_dw14; /* ordinal207 */
2773 uint32_t iqtimer_pkt_dw15; /* ordinal208 */
2774 uint32_t iqtimer_pkt_dw16; /* ordinal209 */
2775 uint32_t iqtimer_pkt_dw17; /* ordinal210 */
2776 uint32_t iqtimer_pkt_dw18; /* ordinal211 */
2777 uint32_t iqtimer_pkt_dw19; /* ordinal212 */
2778 uint32_t iqtimer_pkt_dw20; /* ordinal213 */
2779 uint32_t iqtimer_pkt_dw21; /* ordinal214 */
2780 uint32_t iqtimer_pkt_dw22; /* ordinal215 */
2781 uint32_t iqtimer_pkt_dw23; /* ordinal216 */
2782 uint32_t iqtimer_pkt_dw24; /* ordinal217 */
2783 uint32_t iqtimer_pkt_dw25; /* ordinal218 */
2784 uint32_t iqtimer_pkt_dw26; /* ordinal219 */
2785 uint32_t iqtimer_pkt_dw27; /* ordinal220 */
2786 uint32_t iqtimer_pkt_dw28; /* ordinal221 */
2787 uint32_t iqtimer_pkt_dw29; /* ordinal222 */
2788 uint32_t iqtimer_pkt_dw30; /* ordinal223 */
2789 uint32_t iqtimer_pkt_dw31; /* ordinal224 */
2790 uint32_t reserved56; /* ordinal225 */
2791 uint32_t reserved57; /* ordinal226 */
2792 uint32_t reserved58; /* ordinal227 */
2793 uint32_t set_resources_header; /* ordinal228 */
2794 uint32_t set_resources_dw1; /* ordinal229 */
2795 uint32_t set_resources_dw2; /* ordinal230 */
2796 uint32_t set_resources_dw3; /* ordinal231 */
2797 uint32_t set_resources_dw4; /* ordinal232 */
2798 uint32_t set_resources_dw5; /* ordinal233 */
2799 uint32_t set_resources_dw6; /* ordinal234 */
2800 uint32_t set_resources_dw7; /* ordinal235 */
2801 uint32_t reserved59; /* ordinal236 */
2802 uint32_t reserved60; /* ordinal237 */
2803 uint32_t reserved61; /* ordinal238 */
2804 uint32_t reserved62; /* ordinal239 */
2805 uint32_t reserved63; /* ordinal240 */
2806 uint32_t reserved64; /* ordinal241 */
2807 uint32_t reserved65; /* ordinal242 */
2808 uint32_t reserved66; /* ordinal243 */
2809 uint32_t reserved67; /* ordinal244 */
2810 uint32_t reserved68; /* ordinal245 */
2811 uint32_t reserved69; /* ordinal246 */
2812 uint32_t reserved70; /* ordinal247 */
2813 uint32_t reserved71; /* ordinal248 */
2814 uint32_t reserved72; /* ordinal249 */
2815 uint32_t reserved73; /* ordinal250 */
2816 uint32_t reserved74; /* ordinal251 */
2817 uint32_t reserved75; /* ordinal252 */
2818 uint32_t reserved76; /* ordinal253 */
2819 uint32_t reserved77; /* ordinal254 */
2820 uint32_t reserved78; /* ordinal255 */
2821
2822 uint32_t reserved_t[256]; /* Reserve 256 dword buffer used by ucode */
2823};
2824
2825static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
2826{
2827 int i, r;
2828
2829 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2830 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2831
2832 if (ring->mqd_obj) {
2833 r = amdgpu_bo_reserve(ring->mqd_obj, false);
2834 if (unlikely(r != 0))
2835 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
2836
2837 amdgpu_bo_unpin(ring->mqd_obj);
2838 amdgpu_bo_unreserve(ring->mqd_obj);
2839
2840 amdgpu_bo_unref(&ring->mqd_obj);
2841 ring->mqd_obj = NULL;
2842 }
2843 }
2844}
2845
2846static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
2847{
2848 int r, i, j;
2849 u32 tmp;
2850 bool use_doorbell = true;
2851 u64 hqd_gpu_addr;
2852 u64 mqd_gpu_addr;
2853 u64 eop_gpu_addr;
2854 u64 wb_gpu_addr;
2855 u32 *buf;
2856 struct vi_mqd *mqd;
2857
2858 /* init the pipes */
2859 mutex_lock(&adev->srbm_mutex);
2860 for (i = 0; i < (adev->gfx.mec.num_pipe * adev->gfx.mec.num_mec); i++) {
2861 int me = (i < 4) ? 1 : 2;
2862 int pipe = (i < 4) ? i : (i - 4);
2863
2864 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE);
2865 eop_gpu_addr >>= 8;
2866
2867 vi_srbm_select(adev, me, pipe, 0, 0);
2868
2869 /* write the EOP addr */
2870 WREG32(mmCP_HQD_EOP_BASE_ADDR, eop_gpu_addr);
2871 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
2872
2873 /* set the VMID assigned */
2874 WREG32(mmCP_HQD_VMID, 0);
2875
2876 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2877 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
2878 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
2879 (order_base_2(MEC_HPD_SIZE / 4) - 1));
2880 WREG32(mmCP_HQD_EOP_CONTROL, tmp);
2881 }
2882 vi_srbm_select(adev, 0, 0, 0, 0);
2883 mutex_unlock(&adev->srbm_mutex);
2884
2885 /* init the queues. Just two for now. */
2886 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2887 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2888
2889 if (ring->mqd_obj == NULL) {
2890 r = amdgpu_bo_create(adev,
2891 sizeof(struct vi_mqd),
2892 PAGE_SIZE, true,
2893 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
2894 &ring->mqd_obj);
2895 if (r) {
2896 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
2897 return r;
2898 }
2899 }
2900
2901 r = amdgpu_bo_reserve(ring->mqd_obj, false);
2902 if (unlikely(r != 0)) {
2903 gfx_v8_0_cp_compute_fini(adev);
2904 return r;
2905 }
2906 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
2907 &mqd_gpu_addr);
2908 if (r) {
2909 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
2910 gfx_v8_0_cp_compute_fini(adev);
2911 return r;
2912 }
2913 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
2914 if (r) {
2915 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
2916 gfx_v8_0_cp_compute_fini(adev);
2917 return r;
2918 }
2919
2920 /* init the mqd struct */
2921 memset(buf, 0, sizeof(struct vi_mqd));
2922
2923 mqd = (struct vi_mqd *)buf;
2924 mqd->header = 0xC0310800;
2925 mqd->compute_pipelinestat_enable = 0x00000001;
2926 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
2927 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
2928 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
2929 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
2930 mqd->compute_misc_reserved = 0x00000003;
2931
2932 mutex_lock(&adev->srbm_mutex);
2933 vi_srbm_select(adev, ring->me,
2934 ring->pipe,
2935 ring->queue, 0);
2936
2937 /* disable wptr polling */
2938 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
2939 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
2940 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
2941
2942 mqd->cp_hqd_eop_base_addr_lo =
2943 RREG32(mmCP_HQD_EOP_BASE_ADDR);
2944 mqd->cp_hqd_eop_base_addr_hi =
2945 RREG32(mmCP_HQD_EOP_BASE_ADDR_HI);
2946
2947 /* enable doorbell? */
2948 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
2949 if (use_doorbell) {
2950 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
2951 } else {
2952 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
2953 }
2954 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
2955 mqd->cp_hqd_pq_doorbell_control = tmp;
2956
2957 /* disable the queue if it's active */
2958 mqd->cp_hqd_dequeue_request = 0;
2959 mqd->cp_hqd_pq_rptr = 0;
2960 mqd->cp_hqd_pq_wptr= 0;
2961 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
2962 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
2963 for (j = 0; j < adev->usec_timeout; j++) {
2964 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
2965 break;
2966 udelay(1);
2967 }
2968 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
2969 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
2970 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
2971 }
2972
2973 /* set the pointer to the MQD */
2974 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
2975 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
2976 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
2977 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
2978
2979 /* set MQD vmid to 0 */
2980 tmp = RREG32(mmCP_MQD_CONTROL);
2981 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
2982 WREG32(mmCP_MQD_CONTROL, tmp);
2983 mqd->cp_mqd_control = tmp;
2984
2985 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
2986 hqd_gpu_addr = ring->gpu_addr >> 8;
2987 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
2988 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
2989 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
2990 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
2991
2992 /* set up the HQD, this is similar to CP_RB0_CNTL */
2993 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
2994 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
2995 (order_base_2(ring->ring_size / 4) - 1));
2996 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
2997 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
2998#ifdef __BIG_ENDIAN
2999 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
3000#endif
3001 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
3002 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
3003 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
3004 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
3005 WREG32(mmCP_HQD_PQ_CONTROL, tmp);
3006 mqd->cp_hqd_pq_control = tmp;
3007
3008 /* set the wb address wether it's enabled or not */
3009 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
3010 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
3011 mqd->cp_hqd_pq_rptr_report_addr_hi =
3012 upper_32_bits(wb_gpu_addr) & 0xffff;
3013 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
3014 mqd->cp_hqd_pq_rptr_report_addr_lo);
3015 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3016 mqd->cp_hqd_pq_rptr_report_addr_hi);
3017
3018 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
3019 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
3020 mqd->cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3021 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3022 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr);
3023 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
3024 mqd->cp_hqd_pq_wptr_poll_addr_hi);
3025
3026 /* enable the doorbell if requested */
3027 if (use_doorbell) {
3028 if (adev->asic_type == CHIP_CARRIZO) {
3029 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
3030 AMDGPU_DOORBELL_KIQ << 2);
3031 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
3032 AMDGPU_DOORBELL_MEC_RING7 << 2);
3033 }
3034 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
3035 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
3036 DOORBELL_OFFSET, ring->doorbell_index);
3037 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
3038 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
3039 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
3040 mqd->cp_hqd_pq_doorbell_control = tmp;
3041
3042 } else {
3043 mqd->cp_hqd_pq_doorbell_control = 0;
3044 }
3045 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL,
3046 mqd->cp_hqd_pq_doorbell_control);
3047
3048 /* set the vmid for the queue */
3049 mqd->cp_hqd_vmid = 0;
3050 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
3051
3052 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
3053 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
3054 WREG32(mmCP_HQD_PERSISTENT_STATE, tmp);
3055 mqd->cp_hqd_persistent_state = tmp;
3056
3057 /* activate the queue */
3058 mqd->cp_hqd_active = 1;
3059 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
3060
3061 vi_srbm_select(adev, 0, 0, 0, 0);
3062 mutex_unlock(&adev->srbm_mutex);
3063
3064 amdgpu_bo_kunmap(ring->mqd_obj);
3065 amdgpu_bo_unreserve(ring->mqd_obj);
3066 }
3067
3068 if (use_doorbell) {
3069 tmp = RREG32(mmCP_PQ_STATUS);
3070 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
3071 WREG32(mmCP_PQ_STATUS, tmp);
3072 }
3073
3074 r = gfx_v8_0_cp_compute_start(adev);
3075 if (r)
3076 return r;
3077
3078 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
3079 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
3080
3081 ring->ready = true;
3082 r = amdgpu_ring_test_ring(ring);
3083 if (r)
3084 ring->ready = false;
3085 }
3086
3087 return 0;
3088}
3089
3090static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
3091{
3092 int r;
3093
3094 if (adev->asic_type != CHIP_CARRIZO)
3095 gfx_v8_0_enable_gui_idle_interrupt(adev, false);
3096
3097 if (!adev->firmware.smu_load) {
3098 /* legacy firmware loading */
3099 r = gfx_v8_0_cp_gfx_load_microcode(adev);
3100 if (r)
3101 return r;
3102
3103 r = gfx_v8_0_cp_compute_load_microcode(adev);
3104 if (r)
3105 return r;
3106 } else {
3107 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3108 AMDGPU_UCODE_ID_CP_CE);
3109 if (r)
3110 return -EINVAL;
3111
3112 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3113 AMDGPU_UCODE_ID_CP_PFP);
3114 if (r)
3115 return -EINVAL;
3116
3117 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3118 AMDGPU_UCODE_ID_CP_ME);
3119 if (r)
3120 return -EINVAL;
3121
3122 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
3123 AMDGPU_UCODE_ID_CP_MEC1);
3124 if (r)
3125 return -EINVAL;
3126 }
3127
3128 r = gfx_v8_0_cp_gfx_resume(adev);
3129 if (r)
3130 return r;
3131
3132 r = gfx_v8_0_cp_compute_resume(adev);
3133 if (r)
3134 return r;
3135
3136 gfx_v8_0_enable_gui_idle_interrupt(adev, true);
3137
3138 return 0;
3139}
3140
3141static void gfx_v8_0_cp_enable(struct amdgpu_device *adev, bool enable)
3142{
3143 gfx_v8_0_cp_gfx_enable(adev, enable);
3144 gfx_v8_0_cp_compute_enable(adev, enable);
3145}
3146
3147static int gfx_v8_0_hw_init(struct amdgpu_device *adev)
3148{
3149 int r;
3150
3151 gfx_v8_0_init_golden_registers(adev);
3152
3153 gfx_v8_0_gpu_init(adev);
3154
3155 r = gfx_v8_0_rlc_resume(adev);
3156 if (r)
3157 return r;
3158
3159 r = gfx_v8_0_cp_resume(adev);
3160 if (r)
3161 return r;
3162
3163 return r;
3164}
3165
3166static int gfx_v8_0_hw_fini(struct amdgpu_device *adev)
3167{
3168 gfx_v8_0_cp_enable(adev, false);
3169 gfx_v8_0_rlc_stop(adev);
3170 gfx_v8_0_cp_compute_fini(adev);
3171
3172 return 0;
3173}
3174
3175static int gfx_v8_0_suspend(struct amdgpu_device *adev)
3176{
3177 return gfx_v8_0_hw_fini(adev);
3178}
3179
3180static int gfx_v8_0_resume(struct amdgpu_device *adev)
3181{
3182 return gfx_v8_0_hw_init(adev);
3183}
3184
3185static bool gfx_v8_0_is_idle(struct amdgpu_device *adev)
3186{
3187 if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE))
3188 return false;
3189 else
3190 return true;
3191}
3192
3193static int gfx_v8_0_wait_for_idle(struct amdgpu_device *adev)
3194{
3195 unsigned i;
3196 u32 tmp;
3197
3198 for (i = 0; i < adev->usec_timeout; i++) {
3199 /* read MC_STATUS */
3200 tmp = RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK;
3201
3202 if (!REG_GET_FIELD(tmp, GRBM_STATUS, GUI_ACTIVE))
3203 return 0;
3204 udelay(1);
3205 }
3206 return -ETIMEDOUT;
3207}
3208
3209static void gfx_v8_0_print_status(struct amdgpu_device *adev)
3210{
3211 int i;
3212
3213 dev_info(adev->dev, "GFX 8.x registers\n");
3214 dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
3215 RREG32(mmGRBM_STATUS));
3216 dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
3217 RREG32(mmGRBM_STATUS2));
3218 dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
3219 RREG32(mmGRBM_STATUS_SE0));
3220 dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
3221 RREG32(mmGRBM_STATUS_SE1));
3222 dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
3223 RREG32(mmGRBM_STATUS_SE2));
3224 dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
3225 RREG32(mmGRBM_STATUS_SE3));
3226 dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
3227 dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
3228 RREG32(mmCP_STALLED_STAT1));
3229 dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
3230 RREG32(mmCP_STALLED_STAT2));
3231 dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
3232 RREG32(mmCP_STALLED_STAT3));
3233 dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
3234 RREG32(mmCP_CPF_BUSY_STAT));
3235 dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
3236 RREG32(mmCP_CPF_STALLED_STAT1));
3237 dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
3238 dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
3239 dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
3240 RREG32(mmCP_CPC_STALLED_STAT1));
3241 dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
3242
3243 for (i = 0; i < 32; i++) {
3244 dev_info(adev->dev, " GB_TILE_MODE%d=0x%08X\n",
3245 i, RREG32(mmGB_TILE_MODE0 + (i * 4)));
3246 }
3247 for (i = 0; i < 16; i++) {
3248 dev_info(adev->dev, " GB_MACROTILE_MODE%d=0x%08X\n",
3249 i, RREG32(mmGB_MACROTILE_MODE0 + (i * 4)));
3250 }
3251 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
3252 dev_info(adev->dev, " se: %d\n", i);
3253 gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
3254 dev_info(adev->dev, " PA_SC_RASTER_CONFIG=0x%08X\n",
3255 RREG32(mmPA_SC_RASTER_CONFIG));
3256 dev_info(adev->dev, " PA_SC_RASTER_CONFIG_1=0x%08X\n",
3257 RREG32(mmPA_SC_RASTER_CONFIG_1));
3258 }
3259 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
3260
3261 dev_info(adev->dev, " GB_ADDR_CONFIG=0x%08X\n",
3262 RREG32(mmGB_ADDR_CONFIG));
3263 dev_info(adev->dev, " HDP_ADDR_CONFIG=0x%08X\n",
3264 RREG32(mmHDP_ADDR_CONFIG));
3265 dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
3266 RREG32(mmDMIF_ADDR_CALC));
3267 dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
3268 RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
3269 dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
3270 RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
3271 dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
3272 RREG32(mmUVD_UDEC_ADDR_CONFIG));
3273 dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
3274 RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
3275 dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
3276 RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
3277
3278 dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
3279 RREG32(mmCP_MEQ_THRESHOLDS));
3280 dev_info(adev->dev, " SX_DEBUG_1=0x%08X\n",
3281 RREG32(mmSX_DEBUG_1));
3282 dev_info(adev->dev, " TA_CNTL_AUX=0x%08X\n",
3283 RREG32(mmTA_CNTL_AUX));
3284 dev_info(adev->dev, " SPI_CONFIG_CNTL=0x%08X\n",
3285 RREG32(mmSPI_CONFIG_CNTL));
3286 dev_info(adev->dev, " SQ_CONFIG=0x%08X\n",
3287 RREG32(mmSQ_CONFIG));
3288 dev_info(adev->dev, " DB_DEBUG=0x%08X\n",
3289 RREG32(mmDB_DEBUG));
3290 dev_info(adev->dev, " DB_DEBUG2=0x%08X\n",
3291 RREG32(mmDB_DEBUG2));
3292 dev_info(adev->dev, " DB_DEBUG3=0x%08X\n",
3293 RREG32(mmDB_DEBUG3));
3294 dev_info(adev->dev, " CB_HW_CONTROL=0x%08X\n",
3295 RREG32(mmCB_HW_CONTROL));
3296 dev_info(adev->dev, " SPI_CONFIG_CNTL_1=0x%08X\n",
3297 RREG32(mmSPI_CONFIG_CNTL_1));
3298 dev_info(adev->dev, " PA_SC_FIFO_SIZE=0x%08X\n",
3299 RREG32(mmPA_SC_FIFO_SIZE));
3300 dev_info(adev->dev, " VGT_NUM_INSTANCES=0x%08X\n",
3301 RREG32(mmVGT_NUM_INSTANCES));
3302 dev_info(adev->dev, " CP_PERFMON_CNTL=0x%08X\n",
3303 RREG32(mmCP_PERFMON_CNTL));
3304 dev_info(adev->dev, " PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
3305 RREG32(mmPA_SC_FORCE_EOV_MAX_CNTS));
3306 dev_info(adev->dev, " VGT_CACHE_INVALIDATION=0x%08X\n",
3307 RREG32(mmVGT_CACHE_INVALIDATION));
3308 dev_info(adev->dev, " VGT_GS_VERTEX_REUSE=0x%08X\n",
3309 RREG32(mmVGT_GS_VERTEX_REUSE));
3310 dev_info(adev->dev, " PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
3311 RREG32(mmPA_SC_LINE_STIPPLE_STATE));
3312 dev_info(adev->dev, " PA_CL_ENHANCE=0x%08X\n",
3313 RREG32(mmPA_CL_ENHANCE));
3314 dev_info(adev->dev, " PA_SC_ENHANCE=0x%08X\n",
3315 RREG32(mmPA_SC_ENHANCE));
3316
3317 dev_info(adev->dev, " CP_ME_CNTL=0x%08X\n",
3318 RREG32(mmCP_ME_CNTL));
3319 dev_info(adev->dev, " CP_MAX_CONTEXT=0x%08X\n",
3320 RREG32(mmCP_MAX_CONTEXT));
3321 dev_info(adev->dev, " CP_ENDIAN_SWAP=0x%08X\n",
3322 RREG32(mmCP_ENDIAN_SWAP));
3323 dev_info(adev->dev, " CP_DEVICE_ID=0x%08X\n",
3324 RREG32(mmCP_DEVICE_ID));
3325
3326 dev_info(adev->dev, " CP_SEM_WAIT_TIMER=0x%08X\n",
3327 RREG32(mmCP_SEM_WAIT_TIMER));
3328
3329 dev_info(adev->dev, " CP_RB_WPTR_DELAY=0x%08X\n",
3330 RREG32(mmCP_RB_WPTR_DELAY));
3331 dev_info(adev->dev, " CP_RB_VMID=0x%08X\n",
3332 RREG32(mmCP_RB_VMID));
3333 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
3334 RREG32(mmCP_RB0_CNTL));
3335 dev_info(adev->dev, " CP_RB0_WPTR=0x%08X\n",
3336 RREG32(mmCP_RB0_WPTR));
3337 dev_info(adev->dev, " CP_RB0_RPTR_ADDR=0x%08X\n",
3338 RREG32(mmCP_RB0_RPTR_ADDR));
3339 dev_info(adev->dev, " CP_RB0_RPTR_ADDR_HI=0x%08X\n",
3340 RREG32(mmCP_RB0_RPTR_ADDR_HI));
3341 dev_info(adev->dev, " CP_RB0_CNTL=0x%08X\n",
3342 RREG32(mmCP_RB0_CNTL));
3343 dev_info(adev->dev, " CP_RB0_BASE=0x%08X\n",
3344 RREG32(mmCP_RB0_BASE));
3345 dev_info(adev->dev, " CP_RB0_BASE_HI=0x%08X\n",
3346 RREG32(mmCP_RB0_BASE_HI));
3347 dev_info(adev->dev, " CP_MEC_CNTL=0x%08X\n",
3348 RREG32(mmCP_MEC_CNTL));
3349 dev_info(adev->dev, " CP_CPF_DEBUG=0x%08X\n",
3350 RREG32(mmCP_CPF_DEBUG));
3351
3352 dev_info(adev->dev, " SCRATCH_ADDR=0x%08X\n",
3353 RREG32(mmSCRATCH_ADDR));
3354 dev_info(adev->dev, " SCRATCH_UMSK=0x%08X\n",
3355 RREG32(mmSCRATCH_UMSK));
3356
3357 dev_info(adev->dev, " CP_INT_CNTL_RING0=0x%08X\n",
3358 RREG32(mmCP_INT_CNTL_RING0));
3359 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
3360 RREG32(mmRLC_LB_CNTL));
3361 dev_info(adev->dev, " RLC_CNTL=0x%08X\n",
3362 RREG32(mmRLC_CNTL));
3363 dev_info(adev->dev, " RLC_CGCG_CGLS_CTRL=0x%08X\n",
3364 RREG32(mmRLC_CGCG_CGLS_CTRL));
3365 dev_info(adev->dev, " RLC_LB_CNTR_INIT=0x%08X\n",
3366 RREG32(mmRLC_LB_CNTR_INIT));
3367 dev_info(adev->dev, " RLC_LB_CNTR_MAX=0x%08X\n",
3368 RREG32(mmRLC_LB_CNTR_MAX));
3369 dev_info(adev->dev, " RLC_LB_INIT_CU_MASK=0x%08X\n",
3370 RREG32(mmRLC_LB_INIT_CU_MASK));
3371 dev_info(adev->dev, " RLC_LB_PARAMS=0x%08X\n",
3372 RREG32(mmRLC_LB_PARAMS));
3373 dev_info(adev->dev, " RLC_LB_CNTL=0x%08X\n",
3374 RREG32(mmRLC_LB_CNTL));
3375 dev_info(adev->dev, " RLC_MC_CNTL=0x%08X\n",
3376 RREG32(mmRLC_MC_CNTL));
3377 dev_info(adev->dev, " RLC_UCODE_CNTL=0x%08X\n",
3378 RREG32(mmRLC_UCODE_CNTL));
3379
3380 mutex_lock(&adev->srbm_mutex);
3381 for (i = 0; i < 16; i++) {
3382 vi_srbm_select(adev, 0, 0, 0, i);
3383 dev_info(adev->dev, " VM %d:\n", i);
3384 dev_info(adev->dev, " SH_MEM_CONFIG=0x%08X\n",
3385 RREG32(mmSH_MEM_CONFIG));
3386 dev_info(adev->dev, " SH_MEM_APE1_BASE=0x%08X\n",
3387 RREG32(mmSH_MEM_APE1_BASE));
3388 dev_info(adev->dev, " SH_MEM_APE1_LIMIT=0x%08X\n",
3389 RREG32(mmSH_MEM_APE1_LIMIT));
3390 dev_info(adev->dev, " SH_MEM_BASES=0x%08X\n",
3391 RREG32(mmSH_MEM_BASES));
3392 }
3393 vi_srbm_select(adev, 0, 0, 0, 0);
3394 mutex_unlock(&adev->srbm_mutex);
3395}
3396
3397static int gfx_v8_0_soft_reset(struct amdgpu_device *adev)
3398{
3399 u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3400 u32 tmp;
3401
3402 /* GRBM_STATUS */
3403 tmp = RREG32(mmGRBM_STATUS);
3404 if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
3405 GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
3406 GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
3407 GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
3408 GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
3409 GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK)) {
3410 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3411 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3412 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3413 GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
3414 }
3415
3416 if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
3417 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3418 GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
3419 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
3420 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3421 }
3422
3423 /* GRBM_STATUS2 */
3424 tmp = RREG32(mmGRBM_STATUS2);
3425 if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
3426 grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
3427 GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
3428
3429 /* SRBM_STATUS */
3430 tmp = RREG32(mmSRBM_STATUS);
3431 if (REG_GET_FIELD(tmp, SRBM_STATUS, GRBM_RQ_PENDING))
3432 srbm_soft_reset = REG_SET_FIELD(srbm_soft_reset,
3433 SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
3434
3435 if (grbm_soft_reset || srbm_soft_reset) {
3436 gfx_v8_0_print_status(adev);
3437 /* stop the rlc */
3438 gfx_v8_0_rlc_stop(adev);
3439
3440 /* Disable GFX parsing/prefetching */
3441 gfx_v8_0_cp_gfx_enable(adev, false);
3442
3443 /* Disable MEC parsing/prefetching */
3444 /* XXX todo */
3445
3446 if (grbm_soft_reset) {
3447 tmp = RREG32(mmGRBM_SOFT_RESET);
3448 tmp |= grbm_soft_reset;
3449 dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3450 WREG32(mmGRBM_SOFT_RESET, tmp);
3451 tmp = RREG32(mmGRBM_SOFT_RESET);
3452
3453 udelay(50);
3454
3455 tmp &= ~grbm_soft_reset;
3456 WREG32(mmGRBM_SOFT_RESET, tmp);
3457 tmp = RREG32(mmGRBM_SOFT_RESET);
3458 }
3459
3460 if (srbm_soft_reset) {
3461 tmp = RREG32(mmSRBM_SOFT_RESET);
3462 tmp |= srbm_soft_reset;
3463 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3464 WREG32(mmSRBM_SOFT_RESET, tmp);
3465 tmp = RREG32(mmSRBM_SOFT_RESET);
3466
3467 udelay(50);
3468
3469 tmp &= ~srbm_soft_reset;
3470 WREG32(mmSRBM_SOFT_RESET, tmp);
3471 tmp = RREG32(mmSRBM_SOFT_RESET);
3472 }
3473 /* Wait a little for things to settle down */
3474 udelay(50);
3475 gfx_v8_0_print_status(adev);
3476 }
3477 return 0;
3478}
3479
3480/**
3481 * gfx_v8_0_get_gpu_clock_counter - return GPU clock counter snapshot
3482 *
3483 * @adev: amdgpu_device pointer
3484 *
3485 * Fetches a GPU clock counter snapshot.
3486 * Returns the 64 bit clock counter snapshot.
3487 */
3488uint64_t gfx_v8_0_get_gpu_clock_counter(struct amdgpu_device *adev)
3489{
3490 uint64_t clock;
3491
3492 mutex_lock(&adev->gfx.gpu_clock_mutex);
3493 WREG32(mmRLC_CAPTURE_GPU_CLOCK_COUNT, 1);
3494 clock = (uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_LSB) |
3495 ((uint64_t)RREG32(mmRLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
3496 mutex_unlock(&adev->gfx.gpu_clock_mutex);
3497 return clock;
3498}
3499
3500static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
3501 uint32_t vmid,
3502 uint32_t gds_base, uint32_t gds_size,
3503 uint32_t gws_base, uint32_t gws_size,
3504 uint32_t oa_base, uint32_t oa_size)
3505{
3506 gds_base = gds_base >> AMDGPU_GDS_SHIFT;
3507 gds_size = gds_size >> AMDGPU_GDS_SHIFT;
3508
3509 gws_base = gws_base >> AMDGPU_GWS_SHIFT;
3510 gws_size = gws_size >> AMDGPU_GWS_SHIFT;
3511
3512 oa_base = oa_base >> AMDGPU_OA_SHIFT;
3513 oa_size = oa_size >> AMDGPU_OA_SHIFT;
3514
3515 /* GDS Base */
3516 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3517 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3518 WRITE_DATA_DST_SEL(0)));
3519 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_base);
3520 amdgpu_ring_write(ring, 0);
3521 amdgpu_ring_write(ring, gds_base);
3522
3523 /* GDS Size */
3524 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3525 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3526 WRITE_DATA_DST_SEL(0)));
3527 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].mem_size);
3528 amdgpu_ring_write(ring, 0);
3529 amdgpu_ring_write(ring, gds_size);
3530
3531 /* GWS */
3532 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3533 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3534 WRITE_DATA_DST_SEL(0)));
3535 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].gws);
3536 amdgpu_ring_write(ring, 0);
3537 amdgpu_ring_write(ring, gws_size << GDS_GWS_VMID0__SIZE__SHIFT | gws_base);
3538
3539 /* OA */
3540 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3541 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3542 WRITE_DATA_DST_SEL(0)));
3543 amdgpu_ring_write(ring, amdgpu_gds_reg_offset[vmid].oa);
3544 amdgpu_ring_write(ring, 0);
3545 amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base));
3546}
3547
3548static int gfx_v8_0_early_init(struct amdgpu_device *adev)
3549{
3550
3551 adev->gfx.num_gfx_rings = GFX8_NUM_GFX_RINGS;
3552 adev->gfx.num_compute_rings = GFX8_NUM_COMPUTE_RINGS;
3553 gfx_v8_0_set_ring_funcs(adev);
3554 gfx_v8_0_set_irq_funcs(adev);
3555 gfx_v8_0_set_gds_init(adev);
3556
3557 return 0;
3558}
3559
3560static int gfx_v8_0_set_powergating_state(struct amdgpu_device *adev,
3561 enum amdgpu_powergating_state state)
3562{
3563 return 0;
3564}
3565
3566static int gfx_v8_0_set_clockgating_state(struct amdgpu_device *adev,
3567 enum amdgpu_clockgating_state state)
3568{
3569 return 0;
3570}
3571
3572static u32 gfx_v8_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
3573{
3574 u32 rptr;
3575
3576 rptr = ring->adev->wb.wb[ring->rptr_offs];
3577
3578 return rptr;
3579}
3580
3581static u32 gfx_v8_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
3582{
3583 struct amdgpu_device *adev = ring->adev;
3584 u32 wptr;
3585
3586 if (ring->use_doorbell)
3587 /* XXX check if swapping is necessary on BE */
3588 wptr = ring->adev->wb.wb[ring->wptr_offs];
3589 else
3590 wptr = RREG32(mmCP_RB0_WPTR);
3591
3592 return wptr;
3593}
3594
3595static void gfx_v8_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
3596{
3597 struct amdgpu_device *adev = ring->adev;
3598
3599 if (ring->use_doorbell) {
3600 /* XXX check if swapping is necessary on BE */
3601 adev->wb.wb[ring->wptr_offs] = ring->wptr;
3602 WDOORBELL32(ring->doorbell_index, ring->wptr);
3603 } else {
3604 WREG32(mmCP_RB0_WPTR, ring->wptr);
3605 (void)RREG32(mmCP_RB0_WPTR);
3606 }
3607}
3608
3609static void gfx_v8_0_hdp_flush_cp_ring_emit(struct amdgpu_ring *ring)
3610{
3611 u32 ref_and_mask, reg_mem_engine;
3612
3613 if (ring->type == AMDGPU_RING_TYPE_COMPUTE) {
3614 switch (ring->me) {
3615 case 1:
3616 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
3617 break;
3618 case 2:
3619 ref_and_mask = GPU_HDP_FLUSH_DONE__CP6_MASK << ring->pipe;
3620 break;
3621 default:
3622 return;
3623 }
3624 reg_mem_engine = 0;
3625 } else {
3626 ref_and_mask = GPU_HDP_FLUSH_DONE__CP0_MASK;
3627 reg_mem_engine = WAIT_REG_MEM_ENGINE(1); /* pfp */
3628 }
3629
3630 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3631 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3632 WAIT_REG_MEM_FUNCTION(3) | /* == */
3633 reg_mem_engine));
3634 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ);
3635 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE);
3636 amdgpu_ring_write(ring, ref_and_mask);
3637 amdgpu_ring_write(ring, ref_and_mask);
3638 amdgpu_ring_write(ring, 0x20); /* poll interval */
3639}
3640
3641static void gfx_v8_0_ring_emit_ib(struct amdgpu_ring *ring,
3642 struct amdgpu_ib *ib)
3643{
3644 u32 header, control = 0;
3645 u32 next_rptr = ring->wptr + 5;
3646 if (ring->type == AMDGPU_RING_TYPE_COMPUTE)
3647 control |= INDIRECT_BUFFER_VALID;
3648
3649 if (ib->flush_hdp_writefifo)
3650 next_rptr += 7;
3651
3652 if (ring->need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX)
3653 next_rptr += 2;
3654
3655 next_rptr += 4;
3656 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3657 amdgpu_ring_write(ring, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
3658 amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3659 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3660 amdgpu_ring_write(ring, next_rptr);
3661
3662 if (ib->flush_hdp_writefifo)
3663 gfx_v8_0_hdp_flush_cp_ring_emit(ring);
3664
3665 /* insert SWITCH_BUFFER packet before first IB in the ring frame */
3666 if (ring->need_ctx_switch && ring->type == AMDGPU_RING_TYPE_GFX) {
3667 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3668 amdgpu_ring_write(ring, 0);
3669 ring->need_ctx_switch = false;
3670 }
3671
3672 if (ib->is_const_ib)
3673 header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3674 else
3675 header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3676
3677 control |= ib->length_dw |
3678 (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
3679
3680 amdgpu_ring_write(ring, header);
3681 amdgpu_ring_write(ring,
3682#ifdef __BIG_ENDIAN
3683 (2 << 0) |
3684#endif
3685 (ib->gpu_addr & 0xFFFFFFFC));
3686 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3687 amdgpu_ring_write(ring, control);
3688}
3689
3690static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
3691 u64 seq, bool write64bit)
3692{
3693 /* EVENT_WRITE_EOP - flush caches, send int */
3694 amdgpu_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3695 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
3696 EOP_TC_ACTION_EN |
3697 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3698 EVENT_INDEX(5)));
3699 amdgpu_ring_write(ring, addr & 0xfffffffc);
3700 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3701 DATA_SEL(write64bit ? 2 : 1) | INT_SEL(2));
3702 amdgpu_ring_write(ring, lower_32_bits(seq));
3703 amdgpu_ring_write(ring, upper_32_bits(seq));
3704}
3705
3706/**
3707 * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
3708 *
3709 * @ring: amdgpu ring buffer object
3710 * @semaphore: amdgpu semaphore object
3711 * @emit_wait: Is this a sempahore wait?
3712 *
3713 * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
3714 * from running ahead of semaphore waits.
3715 */
3716static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
3717 struct amdgpu_semaphore *semaphore,
3718 bool emit_wait)
3719{
3720 uint64_t addr = semaphore->gpu_addr;
3721 unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3722
3723 if (ring->adev->asic_type == CHIP_TOPAZ ||
3724 ring->adev->asic_type == CHIP_TONGA) {
3725 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3726 amdgpu_ring_write(ring, lower_32_bits(addr));
3727 amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3728 } else {
3729 amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
3730 amdgpu_ring_write(ring, lower_32_bits(addr));
3731 amdgpu_ring_write(ring, upper_32_bits(addr));
3732 amdgpu_ring_write(ring, sel);
3733 }
3734
3735 if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
3736 /* Prevent the PFP from running ahead of the semaphore wait */
3737 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3738 amdgpu_ring_write(ring, 0x0);
3739 }
3740
3741 return true;
3742}
3743
3744static void gfx_v8_0_ce_sync_me(struct amdgpu_ring *ring)
3745{
3746 struct amdgpu_device *adev = ring->adev;
3747 u64 gpu_addr = adev->wb.gpu_addr + adev->gfx.ce_sync_offs * 4;
3748
3749 /* instruct DE to set a magic number */
3750 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3751 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3752 WRITE_DATA_DST_SEL(5)));
3753 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
3754 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
3755 amdgpu_ring_write(ring, 1);
3756
3757 /* let CE wait till condition satisfied */
3758 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3759 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
3760 WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
3761 WAIT_REG_MEM_FUNCTION(3) | /* == */
3762 WAIT_REG_MEM_ENGINE(2))); /* ce */
3763 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
3764 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
3765 amdgpu_ring_write(ring, 1);
3766 amdgpu_ring_write(ring, 0xffffffff);
3767 amdgpu_ring_write(ring, 4); /* poll interval */
3768
3769 /* instruct CE to reset wb of ce_sync to zero */
3770 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3771 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
3772 WRITE_DATA_DST_SEL(5) |
3773 WR_CONFIRM));
3774 amdgpu_ring_write(ring, gpu_addr & 0xfffffffc);
3775 amdgpu_ring_write(ring, upper_32_bits(gpu_addr) & 0xffffffff);
3776 amdgpu_ring_write(ring, 0);
3777}
3778
3779static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
3780 unsigned vm_id, uint64_t pd_addr)
3781{
3782 int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
3783 u32 srbm_gfx_cntl = 0;
3784
3785 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3786 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
3787 WRITE_DATA_DST_SEL(0)));
3788 if (vm_id < 8) {
3789 amdgpu_ring_write(ring,
3790 (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
3791 } else {
3792 amdgpu_ring_write(ring,
3793 (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
3794 }
3795 amdgpu_ring_write(ring, 0);
3796 amdgpu_ring_write(ring, pd_addr >> 12);
3797
3798 /* update SH_MEM_* regs */
3799 srbm_gfx_cntl = REG_SET_FIELD(srbm_gfx_cntl, SRBM_GFX_CNTL, VMID, vm_id);
3800 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3801 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3802 WRITE_DATA_DST_SEL(0)));
3803 amdgpu_ring_write(ring, mmSRBM_GFX_CNTL);
3804 amdgpu_ring_write(ring, 0);
3805 amdgpu_ring_write(ring, srbm_gfx_cntl);
3806
3807 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
3808 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3809 WRITE_DATA_DST_SEL(0)));
3810 amdgpu_ring_write(ring, mmSH_MEM_BASES);
3811 amdgpu_ring_write(ring, 0);
3812
3813 amdgpu_ring_write(ring, 0); /* SH_MEM_BASES */
3814 amdgpu_ring_write(ring, 0); /* SH_MEM_CONFIG */
3815 amdgpu_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
3816 amdgpu_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
3817
3818 srbm_gfx_cntl = REG_SET_FIELD(srbm_gfx_cntl, SRBM_GFX_CNTL, VMID, 0);
3819 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3820 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3821 WRITE_DATA_DST_SEL(0)));
3822 amdgpu_ring_write(ring, mmSRBM_GFX_CNTL);
3823 amdgpu_ring_write(ring, 0);
3824 amdgpu_ring_write(ring, srbm_gfx_cntl);
3825
3826
3827 /* bits 0-15 are the VM contexts0-15 */
3828 /* invalidate the cache */
3829 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3830 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3831 WRITE_DATA_DST_SEL(0)));
3832 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3833 amdgpu_ring_write(ring, 0);
3834 amdgpu_ring_write(ring, 1 << vm_id);
3835
3836 /* wait for the invalidate to complete */
3837 amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3838 amdgpu_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
3839 WAIT_REG_MEM_FUNCTION(0) | /* always */
3840 WAIT_REG_MEM_ENGINE(0))); /* me */
3841 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
3842 amdgpu_ring_write(ring, 0);
3843 amdgpu_ring_write(ring, 0); /* ref */
3844 amdgpu_ring_write(ring, 0); /* mask */
3845 amdgpu_ring_write(ring, 0x20); /* poll interval */
3846
3847 /* compute doesn't have PFP */
3848 if (usepfp) {
3849 /* sync PFP to ME, otherwise we might get invalid PFP reads */
3850 amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
3851 amdgpu_ring_write(ring, 0x0);
3852
3853 /* synce CE with ME to prevent CE fetch CEIB before context switch done */
3854 gfx_v8_0_ce_sync_me(ring);
3855 }
3856}
3857
3858static bool gfx_v8_0_ring_is_lockup(struct amdgpu_ring *ring)
3859{
3860 if (gfx_v8_0_is_idle(ring->adev)) {
3861 amdgpu_ring_lockup_update(ring);
3862 return false;
3863 }
3864 return amdgpu_ring_test_lockup(ring);
3865}
3866
3867static u32 gfx_v8_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
3868{
3869 return ring->adev->wb.wb[ring->rptr_offs];
3870}
3871
3872static u32 gfx_v8_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
3873{
3874 return ring->adev->wb.wb[ring->wptr_offs];
3875}
3876
3877static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
3878{
3879 struct amdgpu_device *adev = ring->adev;
3880
3881 /* XXX check if swapping is necessary on BE */
3882 adev->wb.wb[ring->wptr_offs] = ring->wptr;
3883 WDOORBELL32(ring->doorbell_index, ring->wptr);
3884}
3885
3886static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
3887 u64 addr, u64 seq,
3888 bool write64bits)
3889{
3890 /* RELEASE_MEM - flush caches, send int */
3891 amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3892 amdgpu_ring_write(ring, (EOP_TCL1_ACTION_EN |
3893 EOP_TC_ACTION_EN |
3894 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3895 EVENT_INDEX(5)));
3896 amdgpu_ring_write(ring, DATA_SEL(write64bits ? 2 : 1) | INT_SEL(2));
3897 amdgpu_ring_write(ring, addr & 0xfffffffc);
3898 amdgpu_ring_write(ring, upper_32_bits(addr));
3899 amdgpu_ring_write(ring, lower_32_bits(seq));
3900 amdgpu_ring_write(ring, upper_32_bits(seq));
3901}
3902
3903static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
3904 enum amdgpu_interrupt_state state)
3905{
3906 u32 cp_int_cntl;
3907
3908 switch (state) {
3909 case AMDGPU_IRQ_STATE_DISABLE:
3910 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
3911 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3912 TIME_STAMP_INT_ENABLE, 0);
3913 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
3914 break;
3915 case AMDGPU_IRQ_STATE_ENABLE:
3916 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
3917 cp_int_cntl =
3918 REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3919 TIME_STAMP_INT_ENABLE, 1);
3920 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
3921 break;
3922 default:
3923 break;
3924 }
3925}
3926
3927static void gfx_v8_0_set_compute_eop_interrupt_state(struct amdgpu_device *adev,
3928 int me, int pipe,
3929 enum amdgpu_interrupt_state state)
3930{
3931 u32 mec_int_cntl, mec_int_cntl_reg;
3932
3933 /*
3934 * amdgpu controls only pipe 0 of MEC1. That's why this function only
3935 * handles the setting of interrupts for this specific pipe. All other
3936 * pipes' interrupts are set by amdkfd.
3937 */
3938
3939 if (me == 1) {
3940 switch (pipe) {
3941 case 0:
3942 mec_int_cntl_reg = mmCP_ME1_PIPE0_INT_CNTL;
3943 break;
3944 default:
3945 DRM_DEBUG("invalid pipe %d\n", pipe);
3946 return;
3947 }
3948 } else {
3949 DRM_DEBUG("invalid me %d\n", me);
3950 return;
3951 }
3952
3953 switch (state) {
3954 case AMDGPU_IRQ_STATE_DISABLE:
3955 mec_int_cntl = RREG32(mec_int_cntl_reg);
3956 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3957 TIME_STAMP_INT_ENABLE, 0);
3958 WREG32(mec_int_cntl_reg, mec_int_cntl);
3959 break;
3960 case AMDGPU_IRQ_STATE_ENABLE:
3961 mec_int_cntl = RREG32(mec_int_cntl_reg);
3962 mec_int_cntl = REG_SET_FIELD(mec_int_cntl, CP_ME1_PIPE0_INT_CNTL,
3963 TIME_STAMP_INT_ENABLE, 1);
3964 WREG32(mec_int_cntl_reg, mec_int_cntl);
3965 break;
3966 default:
3967 break;
3968 }
3969}
3970
3971static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
3972 struct amdgpu_irq_src *source,
3973 unsigned type,
3974 enum amdgpu_interrupt_state state)
3975{
3976 u32 cp_int_cntl;
3977
3978 switch (state) {
3979 case AMDGPU_IRQ_STATE_DISABLE:
3980 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
3981 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3982 PRIV_REG_INT_ENABLE, 0);
3983 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
3984 break;
3985 case AMDGPU_IRQ_STATE_ENABLE:
3986 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
3987 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
3988 PRIV_REG_INT_ENABLE, 0);
3989 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
3990 break;
3991 default:
3992 break;
3993 }
3994
3995 return 0;
3996}
3997
3998static int gfx_v8_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
3999 struct amdgpu_irq_src *source,
4000 unsigned type,
4001 enum amdgpu_interrupt_state state)
4002{
4003 u32 cp_int_cntl;
4004
4005 switch (state) {
4006 case AMDGPU_IRQ_STATE_DISABLE:
4007 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4008 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4009 PRIV_INSTR_INT_ENABLE, 0);
4010 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4011 break;
4012 case AMDGPU_IRQ_STATE_ENABLE:
4013 cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
4014 cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
4015 PRIV_INSTR_INT_ENABLE, 1);
4016 WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
4017 break;
4018 default:
4019 break;
4020 }
4021
4022 return 0;
4023}
4024
4025static int gfx_v8_0_set_eop_interrupt_state(struct amdgpu_device *adev,
4026 struct amdgpu_irq_src *src,
4027 unsigned type,
4028 enum amdgpu_interrupt_state state)
4029{
4030 switch (type) {
4031 case AMDGPU_CP_IRQ_GFX_EOP:
4032 gfx_v8_0_set_gfx_eop_interrupt_state(adev, state);
4033 break;
4034 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP:
4035 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 0, state);
4036 break;
4037 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP:
4038 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 1, state);
4039 break;
4040 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP:
4041 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 2, state);
4042 break;
4043 case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE3_EOP:
4044 gfx_v8_0_set_compute_eop_interrupt_state(adev, 1, 3, state);
4045 break;
4046 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE0_EOP:
4047 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 0, state);
4048 break;
4049 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE1_EOP:
4050 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 1, state);
4051 break;
4052 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE2_EOP:
4053 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 2, state);
4054 break;
4055 case AMDGPU_CP_IRQ_COMPUTE_MEC2_PIPE3_EOP:
4056 gfx_v8_0_set_compute_eop_interrupt_state(adev, 2, 3, state);
4057 break;
4058 default:
4059 break;
4060 }
4061 return 0;
4062}
4063
4064static int gfx_v8_0_eop_irq(struct amdgpu_device *adev,
4065 struct amdgpu_irq_src *source,
4066 struct amdgpu_iv_entry *entry)
4067{
4068 int i;
4069 u8 me_id, pipe_id, queue_id;
4070 struct amdgpu_ring *ring;
4071
4072 DRM_DEBUG("IH: CP EOP\n");
4073 me_id = (entry->ring_id & 0x0c) >> 2;
4074 pipe_id = (entry->ring_id & 0x03) >> 0;
4075 queue_id = (entry->ring_id & 0x70) >> 4;
4076
4077 switch (me_id) {
4078 case 0:
4079 amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
4080 break;
4081 case 1:
4082 case 2:
4083 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4084 ring = &adev->gfx.compute_ring[i];
4085 /* Per-queue interrupt is supported for MEC starting from VI.
4086 * The interrupt can only be enabled/disabled per pipe instead of per queue.
4087 */
4088 if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id))
4089 amdgpu_fence_process(ring);
4090 }
4091 break;
4092 }
4093 return 0;
4094}
4095
4096static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev,
4097 struct amdgpu_irq_src *source,
4098 struct amdgpu_iv_entry *entry)
4099{
4100 DRM_ERROR("Illegal register access in command stream\n");
4101 schedule_work(&adev->reset_work);
4102 return 0;
4103}
4104
4105static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
4106 struct amdgpu_irq_src *source,
4107 struct amdgpu_iv_entry *entry)
4108{
4109 DRM_ERROR("Illegal instruction in command stream\n");
4110 schedule_work(&adev->reset_work);
4111 return 0;
4112}
4113
4114const struct amdgpu_ip_funcs gfx_v8_0_ip_funcs = {
4115 .early_init = gfx_v8_0_early_init,
4116 .late_init = NULL,
4117 .sw_init = gfx_v8_0_sw_init,
4118 .sw_fini = gfx_v8_0_sw_fini,
4119 .hw_init = gfx_v8_0_hw_init,
4120 .hw_fini = gfx_v8_0_hw_fini,
4121 .suspend = gfx_v8_0_suspend,
4122 .resume = gfx_v8_0_resume,
4123 .is_idle = gfx_v8_0_is_idle,
4124 .wait_for_idle = gfx_v8_0_wait_for_idle,
4125 .soft_reset = gfx_v8_0_soft_reset,
4126 .print_status = gfx_v8_0_print_status,
4127 .set_clockgating_state = gfx_v8_0_set_clockgating_state,
4128 .set_powergating_state = gfx_v8_0_set_powergating_state,
4129};
4130
4131static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
4132 .get_rptr = gfx_v8_0_ring_get_rptr_gfx,
4133 .get_wptr = gfx_v8_0_ring_get_wptr_gfx,
4134 .set_wptr = gfx_v8_0_ring_set_wptr_gfx,
4135 .parse_cs = NULL,
4136 .emit_ib = gfx_v8_0_ring_emit_ib,
4137 .emit_fence = gfx_v8_0_ring_emit_fence_gfx,
4138 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4139 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4140 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
4141 .test_ring = gfx_v8_0_ring_test_ring,
4142 .test_ib = gfx_v8_0_ring_test_ib,
4143 .is_lockup = gfx_v8_0_ring_is_lockup,
4144};
4145
4146static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
4147 .get_rptr = gfx_v8_0_ring_get_rptr_compute,
4148 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
4149 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
4150 .parse_cs = NULL,
4151 .emit_ib = gfx_v8_0_ring_emit_ib,
4152 .emit_fence = gfx_v8_0_ring_emit_fence_compute,
4153 .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
4154 .emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
4155 .emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
4156 .test_ring = gfx_v8_0_ring_test_ring,
4157 .test_ib = gfx_v8_0_ring_test_ib,
4158 .is_lockup = gfx_v8_0_ring_is_lockup,
4159};
4160
4161static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
4162{
4163 int i;
4164
4165 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
4166 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
4167
4168 for (i = 0; i < adev->gfx.num_compute_rings; i++)
4169 adev->gfx.compute_ring[i].funcs = &gfx_v8_0_ring_funcs_compute;
4170}
4171
4172static const struct amdgpu_irq_src_funcs gfx_v8_0_eop_irq_funcs = {
4173 .set = gfx_v8_0_set_eop_interrupt_state,
4174 .process = gfx_v8_0_eop_irq,
4175};
4176
4177static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_reg_irq_funcs = {
4178 .set = gfx_v8_0_set_priv_reg_fault_state,
4179 .process = gfx_v8_0_priv_reg_irq,
4180};
4181
4182static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
4183 .set = gfx_v8_0_set_priv_inst_fault_state,
4184 .process = gfx_v8_0_priv_inst_irq,
4185};
4186
4187static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
4188{
4189 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
4190 adev->gfx.eop_irq.funcs = &gfx_v8_0_eop_irq_funcs;
4191
4192 adev->gfx.priv_reg_irq.num_types = 1;
4193 adev->gfx.priv_reg_irq.funcs = &gfx_v8_0_priv_reg_irq_funcs;
4194
4195 adev->gfx.priv_inst_irq.num_types = 1;
4196 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
4197}
4198
4199static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
4200{
4201 /* init asci gds info */
4202 adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
4203 adev->gds.gws.total_size = 64;
4204 adev->gds.oa.total_size = 16;
4205
4206 if (adev->gds.mem.total_size == 64 * 1024) {
4207 adev->gds.mem.gfx_partition_size = 4096;
4208 adev->gds.mem.cs_partition_size = 4096;
4209
4210 adev->gds.gws.gfx_partition_size = 4;
4211 adev->gds.gws.cs_partition_size = 4;
4212
4213 adev->gds.oa.gfx_partition_size = 4;
4214 adev->gds.oa.cs_partition_size = 1;
4215 } else {
4216 adev->gds.mem.gfx_partition_size = 1024;
4217 adev->gds.mem.cs_partition_size = 1024;
4218
4219 adev->gds.gws.gfx_partition_size = 16;
4220 adev->gds.gws.cs_partition_size = 16;
4221
4222 adev->gds.oa.gfx_partition_size = 4;
4223 adev->gds.oa.cs_partition_size = 4;
4224 }
4225}
4226
4227static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
4228 u32 se, u32 sh)
4229{
4230 u32 mask = 0, tmp, tmp1;
4231 int i;
4232
4233 gfx_v8_0_select_se_sh(adev, se, sh);
4234 tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
4235 tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
4236 gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
4237
4238 tmp &= 0xffff0000;
4239
4240 tmp |= tmp1;
4241 tmp >>= 16;
4242
4243 for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
4244 mask <<= 1;
4245 mask |= 1;
4246 }
4247
4248 return (~tmp) & mask;
4249}
4250
4251int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
4252 struct amdgpu_cu_info *cu_info)
4253{
4254 int i, j, k, counter, active_cu_number = 0;
4255 u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
4256
4257 if (!adev || !cu_info)
4258 return -EINVAL;
4259
4260 mutex_lock(&adev->grbm_idx_mutex);
4261 for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
4262 for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
4263 mask = 1;
4264 ao_bitmap = 0;
4265 counter = 0;
4266 bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
4267 cu_info->bitmap[i][j] = bitmap;
4268
4269 for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
4270 if (bitmap & mask) {
4271 if (counter < 2)
4272 ao_bitmap |= mask;
4273 counter ++;
4274 }
4275 mask <<= 1;
4276 }
4277 active_cu_number += counter;
4278 ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
4279 }
4280 }
4281
4282 cu_info->number = active_cu_number;
4283 cu_info->ao_cu_mask = ao_cu_mask;
4284 mutex_unlock(&adev->grbm_idx_mutex);
4285 return 0;
4286}