aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
diff options
context:
space:
mode:
authorAlex Deucher <alexander.deucher@amd.com>2017-04-17 17:51:00 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-05-24 18:33:46 -0400
commite30a52232cbb6f883056aec06abf00cd917c83e8 (patch)
tree0699ae4ed606e97dbb99078bd87f3e2c36e98970 /drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
parentba0c19f5f000d49efc98c0a4dafbd7266a5a20bc (diff)
drm/amdgpu/gfx9: Switch baremetal to use KIQ for compute ring management. (v4)
KIQ is the Kernel Interface Queue for managing the MEC. Rather than setting up rings via direct MMIO of ring registers, the rings are configured via special packets sent to the KIQ. The allows the MEC to better manage shared resources and certain power events. It also reduces the code paths in the driver to support and is required for MEC powergating. v2: drop gfx_v9_0_cp_compute_fini() as well v3: rebase on latest changes derived from gfx8, add unmap queues on hw_fini v4: fix copy/paste typo in error message (Rex) Acked-by: Tom St Denis <tom.stdenis@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c374
1 files changed, 81 insertions, 293 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 6c6644aeb612..503ca4fb3e56 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1427,23 +1427,21 @@ static int gfx_v9_0_sw_init(void *handle)
1427 return r; 1427 return r;
1428 } 1428 }
1429 1429
1430 if (amdgpu_sriov_vf(adev)) { 1430 r = gfx_v9_0_kiq_init(adev);
1431 r = gfx_v9_0_kiq_init(adev); 1431 if (r) {
1432 if (r) { 1432 DRM_ERROR("Failed to init KIQ BOs!\n");
1433 DRM_ERROR("Failed to init KIQ BOs!\n"); 1433 return r;
1434 return r; 1434 }
1435 }
1436 1435
1437 kiq = &adev->gfx.kiq; 1436 kiq = &adev->gfx.kiq;
1438 r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq); 1437 r = gfx_v9_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
1439 if (r) 1438 if (r)
1440 return r; 1439 return r;
1441 1440
1442 /* create MQD for all compute queues as wel as KIQ for SRIOV case */ 1441 /* create MQD for all compute queues as wel as KIQ for SRIOV case */
1443 r = gfx_v9_0_compute_mqd_sw_init(adev); 1442 r = gfx_v9_0_compute_mqd_sw_init(adev);
1444 if (r) 1443 if (r)
1445 return r; 1444 return r;
1446 }
1447 1445
1448 /* reserve GDS, GWS and OA resource for gfx */ 1446 /* reserve GDS, GWS and OA resource for gfx */
1449 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, 1447 r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
@@ -1490,11 +1488,9 @@ static int gfx_v9_0_sw_fini(void *handle)
1490 for (i = 0; i < adev->gfx.num_compute_rings; i++) 1488 for (i = 0; i < adev->gfx.num_compute_rings; i++)
1491 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 1489 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
1492 1490
1493 if (amdgpu_sriov_vf(adev)) { 1491 gfx_v9_0_compute_mqd_sw_fini(adev);
1494 gfx_v9_0_compute_mqd_sw_fini(adev); 1492 gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
1495 gfx_v9_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq); 1493 gfx_v9_0_kiq_fini(adev);
1496 gfx_v9_0_kiq_fini(adev);
1497 }
1498 1494
1499 gfx_v9_0_mec_fini(adev); 1495 gfx_v9_0_mec_fini(adev);
1500 gfx_v9_0_ngg_fini(adev); 1496 gfx_v9_0_ngg_fini(adev);
@@ -2387,13 +2383,6 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
2387 udelay(50); 2383 udelay(50);
2388} 2384}
2389 2385
2390static int gfx_v9_0_cp_compute_start(struct amdgpu_device *adev)
2391{
2392 gfx_v9_0_cp_compute_enable(adev, true);
2393
2394 return 0;
2395}
2396
2397static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev) 2386static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2398{ 2387{
2399 const struct gfx_firmware_header_v1_0 *mec_hdr; 2388 const struct gfx_firmware_header_v1_0 *mec_hdr;
@@ -2436,45 +2425,6 @@ static int gfx_v9_0_cp_compute_load_microcode(struct amdgpu_device *adev)
2436 return 0; 2425 return 0;
2437} 2426}
2438 2427
2439static void gfx_v9_0_cp_compute_fini(struct amdgpu_device *adev)
2440{
2441 int i, r;
2442
2443 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2444 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2445
2446 if (ring->mqd_obj) {
2447 r = amdgpu_bo_reserve(ring->mqd_obj, true);
2448 if (unlikely(r != 0))
2449 dev_warn(adev->dev, "(%d) reserve MQD bo failed\n", r);
2450
2451 amdgpu_bo_unpin(ring->mqd_obj);
2452 amdgpu_bo_unreserve(ring->mqd_obj);
2453
2454 amdgpu_bo_unref(&ring->mqd_obj);
2455 ring->mqd_obj = NULL;
2456 }
2457 }
2458}
2459
2460static int gfx_v9_0_init_queue(struct amdgpu_ring *ring);
2461
2462static int gfx_v9_0_cp_compute_resume(struct amdgpu_device *adev)
2463{
2464 int i, r;
2465 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2466 struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
2467 if (gfx_v9_0_init_queue(ring))
2468 dev_warn(adev->dev, "compute queue %d init failed!\n", i);
2469 }
2470
2471 r = gfx_v9_0_cp_compute_start(adev);
2472 if (r)
2473 return r;
2474
2475 return 0;
2476}
2477
2478/* KIQ functions */ 2428/* KIQ functions */
2479static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) 2429static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring)
2480{ 2430{
@@ -2565,6 +2515,56 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev)
2565 return r; 2515 return r;
2566} 2516}
2567 2517
2518static int gfx_v9_0_kiq_kcq_disable(struct amdgpu_device *adev)
2519{
2520 struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
2521 uint32_t scratch, tmp = 0;
2522 int r, i;
2523
2524 r = amdgpu_gfx_scratch_get(adev, &scratch);
2525 if (r) {
2526 DRM_ERROR("Failed to get scratch reg (%d).\n", r);
2527 return r;
2528 }
2529 WREG32(scratch, 0xCAFEDEAD);
2530
2531 r = amdgpu_ring_alloc(kiq_ring, 6 + 3);
2532 if (r) {
2533 DRM_ERROR("Failed to lock KIQ (%d).\n", r);
2534 amdgpu_gfx_scratch_free(adev, scratch);
2535 return r;
2536 }
2537 /* unmap queues */
2538 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
2539 amdgpu_ring_write(kiq_ring,
2540 PACKET3_UNMAP_QUEUES_ACTION(1)| /* RESET_QUEUES */
2541 PACKET3_UNMAP_QUEUES_QUEUE_SEL(2)); /* select all queues */
2542 amdgpu_ring_write(kiq_ring, 0);
2543 amdgpu_ring_write(kiq_ring, 0);
2544 amdgpu_ring_write(kiq_ring, 0);
2545 amdgpu_ring_write(kiq_ring, 0);
2546 /* write to scratch for completion */
2547 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2548 amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
2549 amdgpu_ring_write(kiq_ring, 0xDEADBEEF);
2550 amdgpu_ring_commit(kiq_ring);
2551
2552 for (i = 0; i < adev->usec_timeout; i++) {
2553 tmp = RREG32(scratch);
2554 if (tmp == 0xDEADBEEF)
2555 break;
2556 DRM_UDELAY(1);
2557 }
2558 if (i >= adev->usec_timeout) {
2559 DRM_ERROR("KCQ disable failed (scratch(0x%04X)=0x%08X)\n",
2560 scratch, tmp);
2561 r = -EINVAL;
2562 }
2563 amdgpu_gfx_scratch_free(adev, scratch);
2564
2565 return r;
2566}
2567
2568static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring) 2568static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
2569{ 2569{
2570 struct amdgpu_device *adev = ring->adev; 2570 struct amdgpu_device *adev = ring->adev;
@@ -2845,7 +2845,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring)
2845 struct v9_mqd *mqd = ring->mqd_ptr; 2845 struct v9_mqd *mqd = ring->mqd_ptr;
2846 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 2846 int mqd_idx = ring - &adev->gfx.compute_ring[0];
2847 2847
2848 if (!adev->gfx.in_reset) { 2848 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
2849 memset((void *)mqd, 0, sizeof(*mqd)); 2849 memset((void *)mqd, 0, sizeof(*mqd));
2850 mutex_lock(&adev->srbm_mutex); 2850 mutex_lock(&adev->srbm_mutex);
2851 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 2851 soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
@@ -2938,10 +2938,7 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2938 if (r) 2938 if (r)
2939 return r; 2939 return r;
2940 2940
2941 if (amdgpu_sriov_vf(adev)) 2941 r = gfx_v9_0_kiq_resume(adev);
2942 r = gfx_v9_0_kiq_resume(adev);
2943 else
2944 r = gfx_v9_0_cp_compute_resume(adev);
2945 if (r) 2942 if (r)
2946 return r; 2943 return r;
2947 2944
@@ -2951,6 +2948,13 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2951 ring->ready = false; 2948 ring->ready = false;
2952 return r; 2949 return r;
2953 } 2950 }
2951
2952 ring = &adev->gfx.kiq.ring;
2953 ring->ready = true;
2954 r = amdgpu_ring_test_ring(ring);
2955 if (r)
2956 ring->ready = false;
2957
2954 for (i = 0; i < adev->gfx.num_compute_rings; i++) { 2958 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
2955 ring = &adev->gfx.compute_ring[i]; 2959 ring = &adev->gfx.compute_ring[i];
2956 2960
@@ -2960,14 +2964,6 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev)
2960 ring->ready = false; 2964 ring->ready = false;
2961 } 2965 }
2962 2966
2963 if (amdgpu_sriov_vf(adev)) {
2964 ring = &adev->gfx.kiq.ring;
2965 ring->ready = true;
2966 r = amdgpu_ring_test_ring(ring);
2967 if (r)
2968 ring->ready = false;
2969 }
2970
2971 gfx_v9_0_enable_gui_idle_interrupt(adev, true); 2967 gfx_v9_0_enable_gui_idle_interrupt(adev, true);
2972 2968
2973 return 0; 2969 return 0;
@@ -3013,9 +3009,9 @@ static int gfx_v9_0_hw_fini(void *handle)
3013 pr_debug("For SRIOV client, shouldn't do anything.\n"); 3009 pr_debug("For SRIOV client, shouldn't do anything.\n");
3014 return 0; 3010 return 0;
3015 } 3011 }
3012 gfx_v9_0_kiq_kcq_disable(adev);
3016 gfx_v9_0_cp_enable(adev, false); 3013 gfx_v9_0_cp_enable(adev, false);
3017 gfx_v9_0_rlc_stop(adev); 3014 gfx_v9_0_rlc_stop(adev);
3018 gfx_v9_0_cp_compute_fini(adev);
3019 3015
3020 return 0; 3016 return 0;
3021} 3017}
@@ -3024,14 +3020,18 @@ static int gfx_v9_0_suspend(void *handle)
3024{ 3020{
3025 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3021 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3026 3022
3023 adev->gfx.in_suspend = true;
3027 return gfx_v9_0_hw_fini(adev); 3024 return gfx_v9_0_hw_fini(adev);
3028} 3025}
3029 3026
3030static int gfx_v9_0_resume(void *handle) 3027static int gfx_v9_0_resume(void *handle)
3031{ 3028{
3032 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 3029 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
3030 int r;
3033 3031
3034 return gfx_v9_0_hw_init(adev); 3032 r = gfx_v9_0_hw_init(adev);
3033 adev->gfx.in_suspend = false;
3034 return r;
3035} 3035}
3036 3036
3037static bool gfx_v9_0_is_idle(void *handle) 3037static bool gfx_v9_0_is_idle(void *handle)
@@ -4479,218 +4479,6 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
4479 return 0; 4479 return 0;
4480} 4480}
4481 4481
4482static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
4483{
4484 int r, j;
4485 u32 tmp;
4486 bool use_doorbell = true;
4487 u64 hqd_gpu_addr;
4488 u64 mqd_gpu_addr;
4489 u64 eop_gpu_addr;
4490 u64 wb_gpu_addr;
4491 u32 *buf;
4492 struct v9_mqd *mqd;
4493 struct amdgpu_device *adev;
4494
4495 adev = ring->adev;
4496 if (ring->mqd_obj == NULL) {
4497 r = amdgpu_bo_create(adev,
4498 sizeof(struct v9_mqd),
4499 PAGE_SIZE,true,
4500 AMDGPU_GEM_DOMAIN_GTT, 0, NULL,
4501 NULL, &ring->mqd_obj);
4502 if (r) {
4503 dev_warn(adev->dev, "(%d) create MQD bo failed\n", r);
4504 return r;
4505 }
4506 }
4507
4508 r = amdgpu_bo_reserve(ring->mqd_obj, false);
4509 if (unlikely(r != 0)) {
4510 gfx_v9_0_cp_compute_fini(adev);
4511 return r;
4512 }
4513
4514 r = amdgpu_bo_pin(ring->mqd_obj, AMDGPU_GEM_DOMAIN_GTT,
4515 &mqd_gpu_addr);
4516 if (r) {
4517 dev_warn(adev->dev, "(%d) pin MQD bo failed\n", r);
4518 gfx_v9_0_cp_compute_fini(adev);
4519 return r;
4520 }
4521 r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&buf);
4522 if (r) {
4523 dev_warn(adev->dev, "(%d) map MQD bo failed\n", r);
4524 gfx_v9_0_cp_compute_fini(adev);
4525 return r;
4526 }
4527
4528 /* init the mqd struct */
4529 memset(buf, 0, sizeof(struct v9_mqd));
4530
4531 mqd = (struct v9_mqd *)buf;
4532 mqd->header = 0xC0310800;
4533 mqd->compute_pipelinestat_enable = 0x00000001;
4534 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4535 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4536 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4537 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4538 mqd->compute_misc_reserved = 0x00000003;
4539 mutex_lock(&adev->srbm_mutex);
4540 soc15_grbm_select(adev, ring->me,
4541 ring->pipe,
4542 ring->queue, 0);
4543 /* disable wptr polling */
4544 WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4545
4546 /* write the EOP addr */
4547 BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */
4548 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring->queue * MEC_HPD_SIZE);
4549 eop_gpu_addr >>= 8;
4550
4551 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR, lower_32_bits(eop_gpu_addr));
4552 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr));
4553 mqd->cp_hqd_eop_base_addr_lo = lower_32_bits(eop_gpu_addr);
4554 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_gpu_addr);
4555
4556 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4557 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL);
4558 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4559 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4560 WREG32_SOC15(GC, 0, mmCP_HQD_EOP_CONTROL, tmp);
4561
4562 /* enable doorbell? */
4563 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
4564 if (use_doorbell)
4565 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4566 else
4567 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0);
4568
4569 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, tmp);
4570 mqd->cp_hqd_pq_doorbell_control = tmp;
4571
4572 /* disable the queue if it's active */
4573 ring->wptr = 0;
4574 mqd->cp_hqd_dequeue_request = 0;
4575 mqd->cp_hqd_pq_rptr = 0;
4576 mqd->cp_hqd_pq_wptr_lo = 0;
4577 mqd->cp_hqd_pq_wptr_hi = 0;
4578 if (RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1) {
4579 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 1);
4580 for (j = 0; j < adev->usec_timeout; j++) {
4581 if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1))
4582 break;
4583 udelay(1);
4584 }
4585 WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4586 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4587 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
4588 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
4589 }
4590
4591 /* set the pointer to the MQD */
4592 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4593 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4594 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4595 WREG32_SOC15(GC, 0, mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4596
4597 /* set MQD vmid to 0 */
4598 tmp = RREG32_SOC15(GC, 0, mmCP_MQD_CONTROL);
4599 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4600 WREG32_SOC15(GC, 0, mmCP_MQD_CONTROL, tmp);
4601 mqd->cp_mqd_control = tmp;
4602
4603 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4604 hqd_gpu_addr = ring->gpu_addr >> 8;
4605 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4606 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4607 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4608 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4609
4610 /* set up the HQD, this is similar to CP_RB0_CNTL */
4611 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL);
4612 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4613 (order_base_2(ring->ring_size / 4) - 1));
4614 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4615 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4616#ifdef __BIG_ENDIAN
4617 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4618#endif
4619 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4620 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4621 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4622 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4623 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_CONTROL, tmp);
4624 mqd->cp_hqd_pq_control = tmp;
4625
4626 /* set the wb address wether it's enabled or not */
4627 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4628 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4629 mqd->cp_hqd_pq_rptr_report_addr_hi =
4630 upper_32_bits(wb_gpu_addr) & 0xffff;
4631 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4632 mqd->cp_hqd_pq_rptr_report_addr_lo);
4633 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4634 mqd->cp_hqd_pq_rptr_report_addr_hi);
4635
4636 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4637 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4638 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4639 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4640 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR,
4641 mqd->cp_hqd_pq_wptr_poll_addr_lo);
4642 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI,
4643 mqd->cp_hqd_pq_wptr_poll_addr_hi);
4644
4645 /* enable the doorbell if requested */
4646 if (use_doorbell) {
4647 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER,
4648 (AMDGPU_DOORBELL64_KIQ * 2) << 2);
4649 WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER,
4650 (AMDGPU_DOORBELL64_MEC_RING7 * 2) << 2);
4651 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL);
4652 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4653 DOORBELL_OFFSET, ring->doorbell_index);
4654 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
4655 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_SOURCE, 0);
4656 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0);
4657 mqd->cp_hqd_pq_doorbell_control = tmp;
4658
4659 } else {
4660 mqd->cp_hqd_pq_doorbell_control = 0;
4661 }
4662 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL,
4663 mqd->cp_hqd_pq_doorbell_control);
4664
4665 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4666 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_LO, mqd->cp_hqd_pq_wptr_lo);
4667 WREG32_SOC15(GC, 0, mmCP_HQD_PQ_WPTR_HI, mqd->cp_hqd_pq_wptr_hi);
4668
4669 /* set the vmid for the queue */
4670 mqd->cp_hqd_vmid = 0;
4671 WREG32_SOC15(GC, 0, mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4672
4673 tmp = RREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE);
4674 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4675 WREG32_SOC15(GC, 0, mmCP_HQD_PERSISTENT_STATE, tmp);
4676 mqd->cp_hqd_persistent_state = tmp;
4677
4678 /* activate the queue */
4679 mqd->cp_hqd_active = 1;
4680 WREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4681
4682 soc15_grbm_select(adev, 0, 0, 0, 0);
4683 mutex_unlock(&adev->srbm_mutex);
4684
4685 amdgpu_bo_kunmap(ring->mqd_obj);
4686 amdgpu_bo_unreserve(ring->mqd_obj);
4687
4688 if (use_doorbell)
4689 WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4690
4691 return 0;
4692}
4693
4694const struct amdgpu_ip_block_version gfx_v9_0_ip_block = 4482const struct amdgpu_ip_block_version gfx_v9_0_ip_block =
4695{ 4483{
4696 .type = AMD_IP_BLOCK_TYPE_GFX, 4484 .type = AMD_IP_BLOCK_TYPE_GFX,