aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2015-04-20 17:31:14 -0400
committerAlex Deucher <alexander.deucher@amd.com>2015-06-03 21:03:17 -0400
commitaaa36a976bbb9b02a54c087ff390c0bad1d18e3e (patch)
tree105be3c06ef33c39e6934801d386847950d4ebf9 /drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
parenta2e73f56fa6282481927ec43aa9362c03c2e2104 (diff)
drm/amdgpu: Add initial VI support
This adds initial support for VI asics. This includes Iceland, Tonga, and Carrizo. Our inital focus as been Carrizo, so there are still gaps in support for Tonga and Iceland, notably power management. Acked-by: Christian König <christian.koenig@amd.com> Acked-by: Jammy Zhou <Jammy.Zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c1447
1 files changed, 1447 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
new file mode 100644
index 000000000000..a83029d548c1
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -0,0 +1,1447 @@
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Alex Deucher
23 */
24#include <linux/firmware.h>
25#include <drm/drmP.h>
26#include "amdgpu.h"
27#include "amdgpu_ucode.h"
28#include "amdgpu_trace.h"
29#include "vi.h"
30#include "vid.h"
31
32#include "oss/oss_2_4_d.h"
33#include "oss/oss_2_4_sh_mask.h"
34
35#include "gmc/gmc_8_1_d.h"
36#include "gmc/gmc_8_1_sh_mask.h"
37
38#include "gca/gfx_8_0_d.h"
39#include "gca/gfx_8_0_sh_mask.h"
40
41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h"
43
44#include "iceland_sdma_pkt_open.h"
45
46static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev);
47static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev);
48static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev);
49static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev);
50
51MODULE_FIRMWARE("radeon/topaz_sdma.bin");
52MODULE_FIRMWARE("radeon/topaz_sdma1.bin");
53
54static const u32 sdma_offsets[SDMA_MAX_INSTANCE] =
55{
56 SDMA0_REGISTER_OFFSET,
57 SDMA1_REGISTER_OFFSET
58};
59
60static const u32 golden_settings_iceland_a11[] =
61{
62 mmSDMA0_CHICKEN_BITS, 0xfc910007, 0x00810007,
63 mmSDMA0_CLK_CTRL, 0xff000fff, 0x00000000,
64 mmSDMA1_CHICKEN_BITS, 0xfc910007, 0x00810007,
65 mmSDMA1_CLK_CTRL, 0xff000fff, 0x00000000,
66};
67
68static const u32 iceland_mgcg_cgcg_init[] =
69{
70 mmSDMA0_CLK_CTRL, 0xff000ff0, 0x00000100,
71 mmSDMA1_CLK_CTRL, 0xff000ff0, 0x00000100
72};
73
74/*
75 * sDMA - System DMA
76 * Starting with CIK, the GPU has new asynchronous
77 * DMA engines. These engines are used for compute
78 * and gfx. There are two DMA engines (SDMA0, SDMA1)
79 * and each one supports 1 ring buffer used for gfx
80 * and 2 queues used for compute.
81 *
82 * The programming model is very similar to the CP
83 * (ring buffer, IBs, etc.), but sDMA has it's own
84 * packet format that is different from the PM4 format
85 * used by the CP. sDMA supports copying data, writing
86 * embedded data, solid fills, and a number of other
87 * things. It also has support for tiling/detiling of
88 * buffers.
89 */
90
91static void sdma_v2_4_init_golden_registers(struct amdgpu_device *adev)
92{
93 switch (adev->asic_type) {
94 case CHIP_TOPAZ:
95 amdgpu_program_register_sequence(adev,
96 iceland_mgcg_cgcg_init,
97 (const u32)ARRAY_SIZE(iceland_mgcg_cgcg_init));
98 amdgpu_program_register_sequence(adev,
99 golden_settings_iceland_a11,
100 (const u32)ARRAY_SIZE(golden_settings_iceland_a11));
101 break;
102 default:
103 break;
104 }
105}
106
107/**
108 * sdma_v2_4_init_microcode - load ucode images from disk
109 *
110 * @adev: amdgpu_device pointer
111 *
112 * Use the firmware interface to load the ucode images into
113 * the driver (not loaded into hw).
114 * Returns 0 on success, error on failure.
115 */
116static int sdma_v2_4_init_microcode(struct amdgpu_device *adev)
117{
118 const char *chip_name;
119 char fw_name[30];
120 int err, i;
121 struct amdgpu_firmware_info *info = NULL;
122 const struct common_firmware_header *header = NULL;
123
124 DRM_DEBUG("\n");
125
126 switch (adev->asic_type) {
127 case CHIP_TOPAZ:
128 chip_name = "topaz";
129 break;
130 default: BUG();
131 }
132
133 for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
134 if (i == 0)
135 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
136 else
137 snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma1.bin", chip_name);
138 err = request_firmware(&adev->sdma[i].fw, fw_name, adev->dev);
139 if (err)
140 goto out;
141 err = amdgpu_ucode_validate(adev->sdma[i].fw);
142 if (err)
143 goto out;
144
145 if (adev->firmware.smu_load) {
146 info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i];
147 info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i;
148 info->fw = adev->sdma[i].fw;
149 header = (const struct common_firmware_header *)info->fw->data;
150 adev->firmware.fw_size +=
151 ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE);
152 }
153 }
154
155out:
156 if (err) {
157 printk(KERN_ERR
158 "sdma_v2_4: Failed to load firmware \"%s\"\n",
159 fw_name);
160 for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
161 release_firmware(adev->sdma[i].fw);
162 adev->sdma[i].fw = NULL;
163 }
164 }
165 return err;
166}
167
168/**
169 * sdma_v2_4_ring_get_rptr - get the current read pointer
170 *
171 * @ring: amdgpu ring pointer
172 *
173 * Get the current rptr from the hardware (VI+).
174 */
175static uint32_t sdma_v2_4_ring_get_rptr(struct amdgpu_ring *ring)
176{
177 u32 rptr;
178
179 /* XXX check if swapping is necessary on BE */
180 rptr = ring->adev->wb.wb[ring->rptr_offs] >> 2;
181
182 return rptr;
183}
184
185/**
186 * sdma_v2_4_ring_get_wptr - get the current write pointer
187 *
188 * @ring: amdgpu ring pointer
189 *
190 * Get the current wptr from the hardware (VI+).
191 */
192static uint32_t sdma_v2_4_ring_get_wptr(struct amdgpu_ring *ring)
193{
194 struct amdgpu_device *adev = ring->adev;
195 int me = (ring == &ring->adev->sdma[0].ring) ? 0 : 1;
196 u32 wptr = RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me]) >> 2;
197
198 return wptr;
199}
200
201/**
202 * sdma_v2_4_ring_set_wptr - commit the write pointer
203 *
204 * @ring: amdgpu ring pointer
205 *
206 * Write the wptr back to the hardware (VI+).
207 */
208static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
209{
210 struct amdgpu_device *adev = ring->adev;
211 int me = (ring == &ring->adev->sdma[0].ring) ? 0 : 1;
212
213 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2);
214}
215
216static void sdma_v2_4_hdp_flush_ring_emit(struct amdgpu_ring *);
217
218/**
219 * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
220 *
221 * @ring: amdgpu ring pointer
222 * @ib: IB object to schedule
223 *
224 * Schedule an IB in the DMA ring (VI).
225 */
226static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
227 struct amdgpu_ib *ib)
228{
229 u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
230 u32 next_rptr = ring->wptr + 5;
231
232 if (ib->flush_hdp_writefifo)
233 next_rptr += 6;
234
235 while ((next_rptr & 7) != 2)
236 next_rptr++;
237
238 next_rptr += 6;
239
240 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
241 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
242 amdgpu_ring_write(ring, lower_32_bits(ring->next_rptr_gpu_addr) & 0xfffffffc);
243 amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
244 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
245 amdgpu_ring_write(ring, next_rptr);
246
247 if (ib->flush_hdp_writefifo) {
248 /* flush HDP */
249 sdma_v2_4_hdp_flush_ring_emit(ring);
250 }
251
252 /* IB packet must end on a 8 DW boundary */
253 while ((ring->wptr & 7) != 2)
254 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP));
255 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
256 SDMA_PKT_INDIRECT_HEADER_VMID(vmid));
257 /* base must be 32 byte aligned */
258 amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr) & 0xffffffe0);
259 amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
260 amdgpu_ring_write(ring, ib->length_dw);
261 amdgpu_ring_write(ring, 0);
262 amdgpu_ring_write(ring, 0);
263
264}
265
266/**
267 * sdma_v2_4_hdp_flush_ring_emit - emit an hdp flush on the DMA ring
268 *
269 * @ring: amdgpu ring pointer
270 *
271 * Emit an hdp flush packet on the requested DMA ring.
272 */
273static void sdma_v2_4_hdp_flush_ring_emit(struct amdgpu_ring *ring)
274{
275 u32 ref_and_mask = 0;
276
277 if (ring == &ring->adev->sdma[0].ring)
278 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA0, 1);
279 else
280 ref_and_mask = REG_SET_FIELD(ref_and_mask, GPU_HDP_FLUSH_DONE, SDMA1, 1);
281
282 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
283 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
284 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
285 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_DONE << 2);
286 amdgpu_ring_write(ring, mmGPU_HDP_FLUSH_REQ << 2);
287 amdgpu_ring_write(ring, ref_and_mask); /* reference */
288 amdgpu_ring_write(ring, ref_and_mask); /* mask */
289 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
290 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
291}
292
293/**
294 * sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
295 *
296 * @ring: amdgpu ring pointer
297 * @fence: amdgpu fence object
298 *
299 * Add a DMA fence packet to the ring to write
300 * the fence seq number and DMA trap packet to generate
301 * an interrupt if needed (VI).
302 */
303static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
304 bool write64bits)
305{
306 /* write the fence */
307 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
308 amdgpu_ring_write(ring, lower_32_bits(addr));
309 amdgpu_ring_write(ring, upper_32_bits(addr));
310 amdgpu_ring_write(ring, lower_32_bits(seq));
311
312 /* optionally write high bits as well */
313 if (write64bits) {
314 addr += 4;
315 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_FENCE));
316 amdgpu_ring_write(ring, lower_32_bits(addr));
317 amdgpu_ring_write(ring, upper_32_bits(addr));
318 amdgpu_ring_write(ring, upper_32_bits(seq));
319 }
320
321 /* generate an interrupt */
322 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_TRAP));
323 amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
324}
325
326/**
327 * sdma_v2_4_ring_emit_semaphore - emit a semaphore on the dma ring
328 *
329 * @ring: amdgpu_ring structure holding ring information
330 * @semaphore: amdgpu semaphore object
331 * @emit_wait: wait or signal semaphore
332 *
333 * Add a DMA semaphore packet to the ring wait on or signal
334 * other rings (VI).
335 */
336static bool sdma_v2_4_ring_emit_semaphore(struct amdgpu_ring *ring,
337 struct amdgpu_semaphore *semaphore,
338 bool emit_wait)
339{
340 u64 addr = semaphore->gpu_addr;
341 u32 sig = emit_wait ? 0 : 1;
342
343 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) |
344 SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig));
345 amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8);
346 amdgpu_ring_write(ring, upper_32_bits(addr));
347
348 return true;
349}
350
351/**
352 * sdma_v2_4_gfx_stop - stop the gfx async dma engines
353 *
354 * @adev: amdgpu_device pointer
355 *
356 * Stop the gfx async dma ring buffers (VI).
357 */
358static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev)
359{
360 struct amdgpu_ring *sdma0 = &adev->sdma[0].ring;
361 struct amdgpu_ring *sdma1 = &adev->sdma[1].ring;
362 u32 rb_cntl, ib_cntl;
363 int i;
364
365 if ((adev->mman.buffer_funcs_ring == sdma0) ||
366 (adev->mman.buffer_funcs_ring == sdma1))
367 amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
368
369 for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
370 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
371 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
372 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
373 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
374 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
375 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
376 }
377 sdma0->ready = false;
378 sdma1->ready = false;
379}
380
381/**
382 * sdma_v2_4_rlc_stop - stop the compute async dma engines
383 *
384 * @adev: amdgpu_device pointer
385 *
386 * Stop the compute async dma queues (VI).
387 */
388static void sdma_v2_4_rlc_stop(struct amdgpu_device *adev)
389{
390 /* XXX todo */
391}
392
393/**
394 * sdma_v2_4_enable - stop the async dma engines
395 *
396 * @adev: amdgpu_device pointer
397 * @enable: enable/disable the DMA MEs.
398 *
399 * Halt or unhalt the async dma engines (VI).
400 */
401static void sdma_v2_4_enable(struct amdgpu_device *adev, bool enable)
402{
403 u32 f32_cntl;
404 int i;
405
406 if (enable == false) {
407 sdma_v2_4_gfx_stop(adev);
408 sdma_v2_4_rlc_stop(adev);
409 }
410
411 for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
412 f32_cntl = RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]);
413 if (enable)
414 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 0);
415 else
416 f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
417 WREG32(mmSDMA0_F32_CNTL + sdma_offsets[i], f32_cntl);
418 }
419}
420
421/**
422 * sdma_v2_4_gfx_resume - setup and start the async dma engines
423 *
424 * @adev: amdgpu_device pointer
425 *
426 * Set up the gfx DMA ring buffers and enable them (VI).
427 * Returns 0 for success, error for failure.
428 */
429static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
430{
431 struct amdgpu_ring *ring;
432 u32 rb_cntl, ib_cntl;
433 u32 rb_bufsz;
434 u32 wb_offset;
435 int i, j, r;
436
437 for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
438 ring = &adev->sdma[i].ring;
439 wb_offset = (ring->rptr_offs * 4);
440
441 mutex_lock(&adev->srbm_mutex);
442 for (j = 0; j < 16; j++) {
443 vi_srbm_select(adev, 0, 0, 0, j);
444 /* SDMA GFX */
445 WREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i], 0);
446 WREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i], 0);
447 }
448 vi_srbm_select(adev, 0, 0, 0, 0);
449 mutex_unlock(&adev->srbm_mutex);
450
451 WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
452
453 /* Set ring buffer size in dwords */
454 rb_bufsz = order_base_2(ring->ring_size / 4);
455 rb_cntl = RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]);
456 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz);
457#ifdef __BIG_ENDIAN
458 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1);
459 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL,
460 RPTR_WRITEBACK_SWAP_ENABLE, 1);
461#endif
462 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
463
464 /* Initialize the ring buffer's read and write pointers */
465 WREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i], 0);
466 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], 0);
467
468 /* set the wb address whether it's enabled or not */
469 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i],
470 upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
471 WREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i],
472 lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC);
473
474 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1);
475
476 WREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i], ring->gpu_addr >> 8);
477 WREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i], ring->gpu_addr >> 40);
478
479 ring->wptr = 0;
480 WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i], ring->wptr << 2);
481
482 /* enable DMA RB */
483 rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1);
484 WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl);
485
486 ib_cntl = RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]);
487 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1);
488#ifdef __BIG_ENDIAN
489 ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1);
490#endif
491 /* enable DMA IBs */
492 WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl);
493
494 ring->ready = true;
495
496 r = amdgpu_ring_test_ring(ring);
497 if (r) {
498 ring->ready = false;
499 return r;
500 }
501
502 if (adev->mman.buffer_funcs_ring == ring)
503 amdgpu_ttm_set_active_vram_size(adev, adev->mc.real_vram_size);
504 }
505
506 return 0;
507}
508
509/**
510 * sdma_v2_4_rlc_resume - setup and start the async dma engines
511 *
512 * @adev: amdgpu_device pointer
513 *
514 * Set up the compute DMA queues and enable them (VI).
515 * Returns 0 for success, error for failure.
516 */
517static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev)
518{
519 /* XXX todo */
520 return 0;
521}
522
523/**
524 * sdma_v2_4_load_microcode - load the sDMA ME ucode
525 *
526 * @adev: amdgpu_device pointer
527 *
528 * Loads the sDMA0/1 ucode.
529 * Returns 0 for success, -EINVAL if the ucode is not available.
530 */
531static int sdma_v2_4_load_microcode(struct amdgpu_device *adev)
532{
533 const struct sdma_firmware_header_v1_0 *hdr;
534 const __le32 *fw_data;
535 u32 fw_size;
536 int i, j;
537 bool smc_loads_fw = false; /* XXX fix me */
538
539 if (!adev->sdma[0].fw || !adev->sdma[1].fw)
540 return -EINVAL;
541
542 /* halt the MEs */
543 sdma_v2_4_enable(adev, false);
544
545 if (smc_loads_fw) {
546 /* XXX query SMC for fw load complete */
547 } else {
548 for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
549 hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data;
550 amdgpu_ucode_print_sdma_hdr(&hdr->header);
551 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
552 adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version);
553
554 fw_data = (const __le32 *)
555 (adev->sdma[i].fw->data +
556 le32_to_cpu(hdr->header.ucode_array_offset_bytes));
557 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0);
558 for (j = 0; j < fw_size; j++)
559 WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++));
560 WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma[i].fw_version);
561 }
562 }
563
564 return 0;
565}
566
567/**
568 * sdma_v2_4_start - setup and start the async dma engines
569 *
570 * @adev: amdgpu_device pointer
571 *
572 * Set up the DMA engines and enable them (VI).
573 * Returns 0 for success, error for failure.
574 */
575static int sdma_v2_4_start(struct amdgpu_device *adev)
576{
577 int r;
578
579 if (!adev->firmware.smu_load) {
580 r = sdma_v2_4_load_microcode(adev);
581 if (r)
582 return r;
583 } else {
584 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
585 AMDGPU_UCODE_ID_SDMA0);
586 if (r)
587 return -EINVAL;
588 r = adev->smu.smumgr_funcs->check_fw_load_finish(adev,
589 AMDGPU_UCODE_ID_SDMA1);
590 if (r)
591 return -EINVAL;
592 }
593
594 /* unhalt the MEs */
595 sdma_v2_4_enable(adev, true);
596
597 /* start the gfx rings and rlc compute queues */
598 r = sdma_v2_4_gfx_resume(adev);
599 if (r)
600 return r;
601 r = sdma_v2_4_rlc_resume(adev);
602 if (r)
603 return r;
604
605 return 0;
606}
607
608/**
609 * sdma_v2_4_ring_test_ring - simple async dma engine test
610 *
611 * @ring: amdgpu_ring structure holding ring information
612 *
613 * Test the DMA engine by writing using it to write an
614 * value to memory. (VI).
615 * Returns 0 for success, error for failure.
616 */
617static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
618{
619 struct amdgpu_device *adev = ring->adev;
620 unsigned i;
621 unsigned index;
622 int r;
623 u32 tmp;
624 u64 gpu_addr;
625
626 r = amdgpu_wb_get(adev, &index);
627 if (r) {
628 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
629 return r;
630 }
631
632 gpu_addr = adev->wb.gpu_addr + (index * 4);
633 tmp = 0xCAFEDEAD;
634 adev->wb.wb[index] = cpu_to_le32(tmp);
635
636 r = amdgpu_ring_lock(ring, 5);
637 if (r) {
638 DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
639 amdgpu_wb_free(adev, index);
640 return r;
641 }
642
643 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
644 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR));
645 amdgpu_ring_write(ring, lower_32_bits(gpu_addr));
646 amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
647 amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
648 amdgpu_ring_write(ring, 0xDEADBEEF);
649 amdgpu_ring_unlock_commit(ring);
650
651 for (i = 0; i < adev->usec_timeout; i++) {
652 tmp = le32_to_cpu(adev->wb.wb[index]);
653 if (tmp == 0xDEADBEEF)
654 break;
655 DRM_UDELAY(1);
656 }
657
658 if (i < adev->usec_timeout) {
659 DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
660 } else {
661 DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
662 ring->idx, tmp);
663 r = -EINVAL;
664 }
665 amdgpu_wb_free(adev, index);
666
667 return r;
668}
669
670/**
671 * sdma_v2_4_ring_test_ib - test an IB on the DMA engine
672 *
673 * @ring: amdgpu_ring structure holding ring information
674 *
675 * Test a simple IB in the DMA ring (VI).
676 * Returns 0 on success, error on failure.
677 */
678static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
679{
680 struct amdgpu_device *adev = ring->adev;
681 struct amdgpu_ib ib;
682 unsigned i;
683 unsigned index;
684 int r;
685 u32 tmp = 0;
686 u64 gpu_addr;
687
688 r = amdgpu_wb_get(adev, &index);
689 if (r) {
690 dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
691 return r;
692 }
693
694 gpu_addr = adev->wb.gpu_addr + (index * 4);
695 tmp = 0xCAFEDEAD;
696 adev->wb.wb[index] = cpu_to_le32(tmp);
697
698 r = amdgpu_ib_get(ring, NULL, 256, &ib);
699 if (r) {
700 amdgpu_wb_free(adev, index);
701 DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
702 return r;
703 }
704
705 ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
706 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR);
707 ib.ptr[1] = lower_32_bits(gpu_addr);
708 ib.ptr[2] = upper_32_bits(gpu_addr);
709 ib.ptr[3] = SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1);
710 ib.ptr[4] = 0xDEADBEEF;
711 ib.ptr[5] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
712 ib.ptr[6] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
713 ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
714 ib.length_dw = 8;
715
716 r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED);
717 if (r) {
718 amdgpu_ib_free(adev, &ib);
719 amdgpu_wb_free(adev, index);
720 DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r);
721 return r;
722 }
723 r = amdgpu_fence_wait(ib.fence, false);
724 if (r) {
725 amdgpu_ib_free(adev, &ib);
726 amdgpu_wb_free(adev, index);
727 DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
728 return r;
729 }
730 for (i = 0; i < adev->usec_timeout; i++) {
731 tmp = le32_to_cpu(adev->wb.wb[index]);
732 if (tmp == 0xDEADBEEF)
733 break;
734 DRM_UDELAY(1);
735 }
736 if (i < adev->usec_timeout) {
737 DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
738 ib.fence->ring->idx, i);
739 } else {
740 DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
741 r = -EINVAL;
742 }
743 amdgpu_ib_free(adev, &ib);
744 amdgpu_wb_free(adev, index);
745 return r;
746}
747
748/**
749 * sdma_v2_4_vm_copy_pte - update PTEs by copying them from the GART
750 *
751 * @ib: indirect buffer to fill with commands
752 * @pe: addr of the page entry
753 * @src: src addr to copy from
754 * @count: number of page entries to update
755 *
756 * Update PTEs by copying them from the GART using sDMA (CIK).
757 */
758static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
759 uint64_t pe, uint64_t src,
760 unsigned count)
761{
762 while (count) {
763 unsigned bytes = count * 8;
764 if (bytes > 0x1FFFF8)
765 bytes = 0x1FFFF8;
766
767 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
768 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
769 ib->ptr[ib->length_dw++] = bytes;
770 ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */
771 ib->ptr[ib->length_dw++] = lower_32_bits(src);
772 ib->ptr[ib->length_dw++] = upper_32_bits(src);
773 ib->ptr[ib->length_dw++] = lower_32_bits(pe);
774 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
775
776 pe += bytes;
777 src += bytes;
778 count -= bytes / 8;
779 }
780}
781
782/**
783 * sdma_v2_4_vm_write_pte - update PTEs by writing them manually
784 *
785 * @ib: indirect buffer to fill with commands
786 * @pe: addr of the page entry
787 * @addr: dst addr to write into pe
788 * @count: number of page entries to update
789 * @incr: increase next addr by incr bytes
790 * @flags: access flags
791 *
792 * Update PTEs by writing them manually using sDMA (CIK).
793 */
794static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
795 uint64_t pe,
796 uint64_t addr, unsigned count,
797 uint32_t incr, uint32_t flags)
798{
799 uint64_t value;
800 unsigned ndw;
801
802 while (count) {
803 ndw = count * 2;
804 if (ndw > 0xFFFFE)
805 ndw = 0xFFFFE;
806
807 /* for non-physically contiguous pages (system) */
808 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) |
809 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR);
810 ib->ptr[ib->length_dw++] = pe;
811 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
812 ib->ptr[ib->length_dw++] = ndw;
813 for (; ndw > 0; ndw -= 2, --count, pe += 8) {
814 if (flags & AMDGPU_PTE_SYSTEM) {
815 value = amdgpu_vm_map_gart(ib->ring->adev, addr);
816 value &= 0xFFFFFFFFFFFFF000ULL;
817 } else if (flags & AMDGPU_PTE_VALID) {
818 value = addr;
819 } else {
820 value = 0;
821 }
822 addr += incr;
823 value |= flags;
824 ib->ptr[ib->length_dw++] = value;
825 ib->ptr[ib->length_dw++] = upper_32_bits(value);
826 }
827 }
828}
829
830/**
831 * sdma_v2_4_vm_set_pte_pde - update the page tables using sDMA
832 *
833 * @ib: indirect buffer to fill with commands
834 * @pe: addr of the page entry
835 * @addr: dst addr to write into pe
836 * @count: number of page entries to update
837 * @incr: increase next addr by incr bytes
838 * @flags: access flags
839 *
840 * Update the page tables using sDMA (CIK).
841 */
842static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
843 uint64_t pe,
844 uint64_t addr, unsigned count,
845 uint32_t incr, uint32_t flags)
846{
847 uint64_t value;
848 unsigned ndw;
849
850 while (count) {
851 ndw = count;
852 if (ndw > 0x7FFFF)
853 ndw = 0x7FFFF;
854
855 if (flags & AMDGPU_PTE_VALID)
856 value = addr;
857 else
858 value = 0;
859
860 /* for physically contiguous pages (vram) */
861 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_GEN_PTEPDE);
862 ib->ptr[ib->length_dw++] = pe; /* dst addr */
863 ib->ptr[ib->length_dw++] = upper_32_bits(pe);
864 ib->ptr[ib->length_dw++] = flags; /* mask */
865 ib->ptr[ib->length_dw++] = 0;
866 ib->ptr[ib->length_dw++] = value; /* value */
867 ib->ptr[ib->length_dw++] = upper_32_bits(value);
868 ib->ptr[ib->length_dw++] = incr; /* increment size */
869 ib->ptr[ib->length_dw++] = 0;
870 ib->ptr[ib->length_dw++] = ndw; /* number of entries */
871
872 pe += ndw * 8;
873 addr += ndw * incr;
874 count -= ndw;
875 }
876}
877
878/**
879 * sdma_v2_4_vm_pad_ib - pad the IB to the required number of dw
880 *
881 * @ib: indirect buffer to fill with padding
882 *
883 */
884static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
885{
886 while (ib->length_dw & 0x7)
887 ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
888}
889
890/**
891 * sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
892 *
893 * @ring: amdgpu_ring pointer
894 * @vm: amdgpu_vm pointer
895 *
896 * Update the page table base and flush the VM TLB
897 * using sDMA (VI).
898 */
899static void sdma_v2_4_ring_emit_vm_flush(struct amdgpu_ring *ring,
900 unsigned vm_id, uint64_t pd_addr)
901{
902 u32 srbm_gfx_cntl = 0;
903
904 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
905 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
906 if (vm_id < 8) {
907 amdgpu_ring_write(ring, (mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + vm_id));
908 } else {
909 amdgpu_ring_write(ring, (mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vm_id - 8));
910 }
911 amdgpu_ring_write(ring, pd_addr >> 12);
912
913 /* update SH_MEM_* regs */
914 srbm_gfx_cntl = REG_SET_FIELD(srbm_gfx_cntl, SRBM_GFX_CNTL, VMID, vm_id);
915 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
916 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
917 amdgpu_ring_write(ring, mmSRBM_GFX_CNTL);
918 amdgpu_ring_write(ring, srbm_gfx_cntl);
919
920 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
921 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
922 amdgpu_ring_write(ring, mmSH_MEM_BASES);
923 amdgpu_ring_write(ring, 0);
924
925 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
926 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
927 amdgpu_ring_write(ring, mmSH_MEM_CONFIG);
928 amdgpu_ring_write(ring, 0);
929
930 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
931 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
932 amdgpu_ring_write(ring, mmSH_MEM_APE1_BASE);
933 amdgpu_ring_write(ring, 1);
934
935 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
936 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
937 amdgpu_ring_write(ring, mmSH_MEM_APE1_LIMIT);
938 amdgpu_ring_write(ring, 0);
939
940 srbm_gfx_cntl = REG_SET_FIELD(srbm_gfx_cntl, SRBM_GFX_CNTL, VMID, 0);
941 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
942 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
943 amdgpu_ring_write(ring, mmSRBM_GFX_CNTL);
944 amdgpu_ring_write(ring, srbm_gfx_cntl);
945
946
947 /* flush TLB */
948 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
949 SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
950 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST);
951 amdgpu_ring_write(ring, 1 << vm_id);
952
953 /* wait for flush */
954 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
955 SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
956 SDMA_PKT_POLL_REGMEM_HEADER_FUNC(0)); /* always */
957 amdgpu_ring_write(ring, mmVM_INVALIDATE_REQUEST << 2);
958 amdgpu_ring_write(ring, 0);
959 amdgpu_ring_write(ring, 0); /* reference */
960 amdgpu_ring_write(ring, 0); /* mask */
961 amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
962 SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
963}
964
965static int sdma_v2_4_early_init(struct amdgpu_device *adev)
966{
967 sdma_v2_4_set_ring_funcs(adev);
968 sdma_v2_4_set_buffer_funcs(adev);
969 sdma_v2_4_set_vm_pte_funcs(adev);
970 sdma_v2_4_set_irq_funcs(adev);
971
972 return 0;
973}
974
975static int sdma_v2_4_sw_init(struct amdgpu_device *adev)
976{
977 struct amdgpu_ring *ring;
978 int r;
979
980 /* SDMA trap event */
981 r = amdgpu_irq_add_id(adev, 224, &adev->sdma_trap_irq);
982 if (r)
983 return r;
984
985 /* SDMA Privileged inst */
986 r = amdgpu_irq_add_id(adev, 241, &adev->sdma_illegal_inst_irq);
987 if (r)
988 return r;
989
990 /* SDMA Privileged inst */
991 r = amdgpu_irq_add_id(adev, 247, &adev->sdma_illegal_inst_irq);
992 if (r)
993 return r;
994
995 r = sdma_v2_4_init_microcode(adev);
996 if (r) {
997 DRM_ERROR("Failed to load sdma firmware!\n");
998 return r;
999 }
1000
1001 ring = &adev->sdma[0].ring;
1002 ring->ring_obj = NULL;
1003 ring->use_doorbell = false;
1004
1005 ring = &adev->sdma[1].ring;
1006 ring->ring_obj = NULL;
1007 ring->use_doorbell = false;
1008
1009 ring = &adev->sdma[0].ring;
1010 sprintf(ring->name, "sdma0");
1011 r = amdgpu_ring_init(adev, ring, 256 * 1024,
1012 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
1013 &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP0,
1014 AMDGPU_RING_TYPE_SDMA);
1015 if (r)
1016 return r;
1017
1018 ring = &adev->sdma[1].ring;
1019 sprintf(ring->name, "sdma1");
1020 r = amdgpu_ring_init(adev, ring, 256 * 1024,
1021 SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), 0xf,
1022 &adev->sdma_trap_irq, AMDGPU_SDMA_IRQ_TRAP1,
1023 AMDGPU_RING_TYPE_SDMA);
1024 if (r)
1025 return r;
1026
1027 return r;
1028}
1029
1030static int sdma_v2_4_sw_fini(struct amdgpu_device *adev)
1031{
1032 amdgpu_ring_fini(&adev->sdma[0].ring);
1033 amdgpu_ring_fini(&adev->sdma[1].ring);
1034
1035 return 0;
1036}
1037
1038static int sdma_v2_4_hw_init(struct amdgpu_device *adev)
1039{
1040 int r;
1041
1042 sdma_v2_4_init_golden_registers(adev);
1043
1044 r = sdma_v2_4_start(adev);
1045 if (r)
1046 return r;
1047
1048 return r;
1049}
1050
1051static int sdma_v2_4_hw_fini(struct amdgpu_device *adev)
1052{
1053 sdma_v2_4_enable(adev, false);
1054
1055 return 0;
1056}
1057
1058static int sdma_v2_4_suspend(struct amdgpu_device *adev)
1059{
1060
1061 return sdma_v2_4_hw_fini(adev);
1062}
1063
1064static int sdma_v2_4_resume(struct amdgpu_device *adev)
1065{
1066
1067 return sdma_v2_4_hw_init(adev);
1068}
1069
1070static bool sdma_v2_4_is_idle(struct amdgpu_device *adev)
1071{
1072 u32 tmp = RREG32(mmSRBM_STATUS2);
1073
1074 if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK |
1075 SRBM_STATUS2__SDMA1_BUSY_MASK))
1076 return false;
1077
1078 return true;
1079}
1080
1081static int sdma_v2_4_wait_for_idle(struct amdgpu_device *adev)
1082{
1083 unsigned i;
1084 u32 tmp;
1085
1086 for (i = 0; i < adev->usec_timeout; i++) {
1087 tmp = RREG32(mmSRBM_STATUS2) & (SRBM_STATUS2__SDMA_BUSY_MASK |
1088 SRBM_STATUS2__SDMA1_BUSY_MASK);
1089
1090 if (!tmp)
1091 return 0;
1092 udelay(1);
1093 }
1094 return -ETIMEDOUT;
1095}
1096
1097static void sdma_v2_4_print_status(struct amdgpu_device *adev)
1098{
1099 int i, j;
1100
1101 dev_info(adev->dev, "VI SDMA registers\n");
1102 dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
1103 RREG32(mmSRBM_STATUS2));
1104 for (i = 0; i < SDMA_MAX_INSTANCE; i++) {
1105 dev_info(adev->dev, " SDMA%d_STATUS_REG=0x%08X\n",
1106 i, RREG32(mmSDMA0_STATUS_REG + sdma_offsets[i]));
1107 dev_info(adev->dev, " SDMA%d_F32_CNTL=0x%08X\n",
1108 i, RREG32(mmSDMA0_F32_CNTL + sdma_offsets[i]));
1109 dev_info(adev->dev, " SDMA%d_CNTL=0x%08X\n",
1110 i, RREG32(mmSDMA0_CNTL + sdma_offsets[i]));
1111 dev_info(adev->dev, " SDMA%d_SEM_WAIT_FAIL_TIMER_CNTL=0x%08X\n",
1112 i, RREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i]));
1113 dev_info(adev->dev, " SDMA%d_GFX_IB_CNTL=0x%08X\n",
1114 i, RREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i]));
1115 dev_info(adev->dev, " SDMA%d_GFX_RB_CNTL=0x%08X\n",
1116 i, RREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i]));
1117 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR=0x%08X\n",
1118 i, RREG32(mmSDMA0_GFX_RB_RPTR + sdma_offsets[i]));
1119 dev_info(adev->dev, " SDMA%d_GFX_RB_WPTR=0x%08X\n",
1120 i, RREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[i]));
1121 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_HI=0x%08X\n",
1122 i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_HI + sdma_offsets[i]));
1123 dev_info(adev->dev, " SDMA%d_GFX_RB_RPTR_ADDR_LO=0x%08X\n",
1124 i, RREG32(mmSDMA0_GFX_RB_RPTR_ADDR_LO + sdma_offsets[i]));
1125 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE=0x%08X\n",
1126 i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
1127 dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
1128 i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
1129 mutex_lock(&adev->srbm_mutex);
1130 for (j = 0; j < 16; j++) {
1131 vi_srbm_select(adev, 0, 0, 0, j);
1132 dev_info(adev->dev, " VM %d:\n", j);
1133 dev_info(adev->dev, " SDMA%d_GFX_VIRTUAL_ADDR=0x%08X\n",
1134 i, RREG32(mmSDMA0_GFX_VIRTUAL_ADDR + sdma_offsets[i]));
1135 dev_info(adev->dev, " SDMA%d_GFX_APE1_CNTL=0x%08X\n",
1136 i, RREG32(mmSDMA0_GFX_APE1_CNTL + sdma_offsets[i]));
1137 }
1138 vi_srbm_select(adev, 0, 0, 0, 0);
1139 mutex_unlock(&adev->srbm_mutex);
1140 }
1141}
1142
1143static int sdma_v2_4_soft_reset(struct amdgpu_device *adev)
1144{
1145 u32 srbm_soft_reset = 0;
1146 u32 tmp = RREG32(mmSRBM_STATUS2);
1147
1148 if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK) {
1149 /* sdma0 */
1150 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
1151 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
1152 WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
1153 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
1154 }
1155 if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK) {
1156 /* sdma1 */
1157 tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
1158 tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 0);
1159 WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
1160 srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
1161 }
1162
1163 if (srbm_soft_reset) {
1164 sdma_v2_4_print_status(adev);
1165
1166 tmp = RREG32(mmSRBM_SOFT_RESET);
1167 tmp |= srbm_soft_reset;
1168 dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
1169 WREG32(mmSRBM_SOFT_RESET, tmp);
1170 tmp = RREG32(mmSRBM_SOFT_RESET);
1171
1172 udelay(50);
1173
1174 tmp &= ~srbm_soft_reset;
1175 WREG32(mmSRBM_SOFT_RESET, tmp);
1176 tmp = RREG32(mmSRBM_SOFT_RESET);
1177
1178 /* Wait a little for things to settle down */
1179 udelay(50);
1180
1181 sdma_v2_4_print_status(adev);
1182 }
1183
1184 return 0;
1185}
1186
1187static int sdma_v2_4_set_trap_irq_state(struct amdgpu_device *adev,
1188 struct amdgpu_irq_src *src,
1189 unsigned type,
1190 enum amdgpu_interrupt_state state)
1191{
1192 u32 sdma_cntl;
1193
1194 switch (type) {
1195 case AMDGPU_SDMA_IRQ_TRAP0:
1196 switch (state) {
1197 case AMDGPU_IRQ_STATE_DISABLE:
1198 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
1199 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
1200 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
1201 break;
1202 case AMDGPU_IRQ_STATE_ENABLE:
1203 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET);
1204 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
1205 WREG32(mmSDMA0_CNTL + SDMA0_REGISTER_OFFSET, sdma_cntl);
1206 break;
1207 default:
1208 break;
1209 }
1210 break;
1211 case AMDGPU_SDMA_IRQ_TRAP1:
1212 switch (state) {
1213 case AMDGPU_IRQ_STATE_DISABLE:
1214 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
1215 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 0);
1216 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
1217 break;
1218 case AMDGPU_IRQ_STATE_ENABLE:
1219 sdma_cntl = RREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET);
1220 sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, 1);
1221 WREG32(mmSDMA0_CNTL + SDMA1_REGISTER_OFFSET, sdma_cntl);
1222 break;
1223 default:
1224 break;
1225 }
1226 break;
1227 default:
1228 break;
1229 }
1230 return 0;
1231}
1232
1233static int sdma_v2_4_process_trap_irq(struct amdgpu_device *adev,
1234 struct amdgpu_irq_src *source,
1235 struct amdgpu_iv_entry *entry)
1236{
1237 u8 instance_id, queue_id;
1238
1239 instance_id = (entry->ring_id & 0x3) >> 0;
1240 queue_id = (entry->ring_id & 0xc) >> 2;
1241 DRM_DEBUG("IH: SDMA trap\n");
1242 switch (instance_id) {
1243 case 0:
1244 switch (queue_id) {
1245 case 0:
1246 amdgpu_fence_process(&adev->sdma[0].ring);
1247 break;
1248 case 1:
1249 /* XXX compute */
1250 break;
1251 case 2:
1252 /* XXX compute */
1253 break;
1254 }
1255 break;
1256 case 1:
1257 switch (queue_id) {
1258 case 0:
1259 amdgpu_fence_process(&adev->sdma[1].ring);
1260 break;
1261 case 1:
1262 /* XXX compute */
1263 break;
1264 case 2:
1265 /* XXX compute */
1266 break;
1267 }
1268 break;
1269 }
1270 return 0;
1271}
1272
1273static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev,
1274 struct amdgpu_irq_src *source,
1275 struct amdgpu_iv_entry *entry)
1276{
1277 DRM_ERROR("Illegal instruction in SDMA command stream\n");
1278 schedule_work(&adev->reset_work);
1279 return 0;
1280}
1281
1282static int sdma_v2_4_set_clockgating_state(struct amdgpu_device *adev,
1283 enum amdgpu_clockgating_state state)
1284{
1285 /* XXX handled via the smc on VI */
1286
1287 return 0;
1288}
1289
1290static int sdma_v2_4_set_powergating_state(struct amdgpu_device *adev,
1291 enum amdgpu_powergating_state state)
1292{
1293 return 0;
1294}
1295
1296const struct amdgpu_ip_funcs sdma_v2_4_ip_funcs = {
1297 .early_init = sdma_v2_4_early_init,
1298 .late_init = NULL,
1299 .sw_init = sdma_v2_4_sw_init,
1300 .sw_fini = sdma_v2_4_sw_fini,
1301 .hw_init = sdma_v2_4_hw_init,
1302 .hw_fini = sdma_v2_4_hw_fini,
1303 .suspend = sdma_v2_4_suspend,
1304 .resume = sdma_v2_4_resume,
1305 .is_idle = sdma_v2_4_is_idle,
1306 .wait_for_idle = sdma_v2_4_wait_for_idle,
1307 .soft_reset = sdma_v2_4_soft_reset,
1308 .print_status = sdma_v2_4_print_status,
1309 .set_clockgating_state = sdma_v2_4_set_clockgating_state,
1310 .set_powergating_state = sdma_v2_4_set_powergating_state,
1311};
1312
1313/**
1314 * sdma_v2_4_ring_is_lockup - Check if the DMA engine is locked up
1315 *
1316 * @ring: amdgpu_ring structure holding ring information
1317 *
1318 * Check if the async DMA engine is locked up (VI).
1319 * Returns true if the engine appears to be locked up, false if not.
1320 */
1321static bool sdma_v2_4_ring_is_lockup(struct amdgpu_ring *ring)
1322{
1323
1324 if (sdma_v2_4_is_idle(ring->adev)) {
1325 amdgpu_ring_lockup_update(ring);
1326 return false;
1327 }
1328 return amdgpu_ring_test_lockup(ring);
1329}
1330
1331static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
1332 .get_rptr = sdma_v2_4_ring_get_rptr,
1333 .get_wptr = sdma_v2_4_ring_get_wptr,
1334 .set_wptr = sdma_v2_4_ring_set_wptr,
1335 .parse_cs = NULL,
1336 .emit_ib = sdma_v2_4_ring_emit_ib,
1337 .emit_fence = sdma_v2_4_ring_emit_fence,
1338 .emit_semaphore = sdma_v2_4_ring_emit_semaphore,
1339 .emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
1340 .test_ring = sdma_v2_4_ring_test_ring,
1341 .test_ib = sdma_v2_4_ring_test_ib,
1342 .is_lockup = sdma_v2_4_ring_is_lockup,
1343};
1344
1345static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
1346{
1347 adev->sdma[0].ring.funcs = &sdma_v2_4_ring_funcs;
1348 adev->sdma[1].ring.funcs = &sdma_v2_4_ring_funcs;
1349}
1350
1351static const struct amdgpu_irq_src_funcs sdma_v2_4_trap_irq_funcs = {
1352 .set = sdma_v2_4_set_trap_irq_state,
1353 .process = sdma_v2_4_process_trap_irq,
1354};
1355
1356static const struct amdgpu_irq_src_funcs sdma_v2_4_illegal_inst_irq_funcs = {
1357 .process = sdma_v2_4_process_illegal_inst_irq,
1358};
1359
1360static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev)
1361{
1362 adev->sdma_trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
1363 adev->sdma_trap_irq.funcs = &sdma_v2_4_trap_irq_funcs;
1364 adev->sdma_illegal_inst_irq.funcs = &sdma_v2_4_illegal_inst_irq_funcs;
1365}
1366
1367/**
1368 * sdma_v2_4_emit_copy_buffer - copy buffer using the sDMA engine
1369 *
1370 * @ring: amdgpu_ring structure holding ring information
1371 * @src_offset: src GPU address
1372 * @dst_offset: dst GPU address
1373 * @byte_count: number of bytes to xfer
1374 *
1375 * Copy GPU buffers using the DMA engine (VI).
1376 * Used by the amdgpu ttm implementation to move pages if
1377 * registered as the asic copy callback.
1378 */
1379static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring,
1380 uint64_t src_offset,
1381 uint64_t dst_offset,
1382 uint32_t byte_count)
1383{
1384 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) |
1385 SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR));
1386 amdgpu_ring_write(ring, byte_count);
1387 amdgpu_ring_write(ring, 0); /* src/dst endian swap */
1388 amdgpu_ring_write(ring, lower_32_bits(src_offset));
1389 amdgpu_ring_write(ring, upper_32_bits(src_offset));
1390 amdgpu_ring_write(ring, lower_32_bits(dst_offset));
1391 amdgpu_ring_write(ring, upper_32_bits(dst_offset));
1392}
1393
1394/**
1395 * sdma_v2_4_emit_fill_buffer - fill buffer using the sDMA engine
1396 *
1397 * @ring: amdgpu_ring structure holding ring information
1398 * @src_data: value to write to buffer
1399 * @dst_offset: dst GPU address
1400 * @byte_count: number of bytes to xfer
1401 *
1402 * Fill GPU buffers using the DMA engine (VI).
1403 */
1404static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ring *ring,
1405 uint32_t src_data,
1406 uint64_t dst_offset,
1407 uint32_t byte_count)
1408{
1409 amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL));
1410 amdgpu_ring_write(ring, lower_32_bits(dst_offset));
1411 amdgpu_ring_write(ring, upper_32_bits(dst_offset));
1412 amdgpu_ring_write(ring, src_data);
1413 amdgpu_ring_write(ring, byte_count);
1414}
1415
1416static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = {
1417 .copy_max_bytes = 0x1fffff,
1418 .copy_num_dw = 7,
1419 .emit_copy_buffer = sdma_v2_4_emit_copy_buffer,
1420
1421 .fill_max_bytes = 0x1fffff,
1422 .fill_num_dw = 7,
1423 .emit_fill_buffer = sdma_v2_4_emit_fill_buffer,
1424};
1425
1426static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
1427{
1428 if (adev->mman.buffer_funcs == NULL) {
1429 adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs;
1430 adev->mman.buffer_funcs_ring = &adev->sdma[0].ring;
1431 }
1432}
1433
1434static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
1435 .copy_pte = sdma_v2_4_vm_copy_pte,
1436 .write_pte = sdma_v2_4_vm_write_pte,
1437 .set_pte_pde = sdma_v2_4_vm_set_pte_pde,
1438 .pad_ib = sdma_v2_4_vm_pad_ib,
1439};
1440
1441static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
1442{
1443 if (adev->vm_manager.vm_pte_funcs == NULL) {
1444 adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
1445 adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring;
1446 }
1447}