aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm
diff options
context:
space:
mode:
authorXiangliang Yu <Xiangliang.Yu@amd.com>2016-12-23 02:00:01 -0500
committerAlex Deucher <alexander.deucher@amd.com>2017-01-27 11:13:03 -0500
commit4e638ae9c1e7a2b85155f2dd91c8105ce109ea7e (patch)
tree13a53a5ba7500024bce122f4ad102aec3151b9ca /drivers/gpu/drm
parentb64a18c502fedab9e7b8b1b557909994637972c5 (diff)
drm/amdgpu/gfx8: add support kernel interface queue(KIQ)
KIQ is queue-memory based initialization method: setup KIQ queue firstly, then send command to KIQ to setup other queues, without accessing registers. For virtualization, need KIQ to access virtual function registers when running on guest mode. V2: use amdgpu_bo_create/free_kernel to allocate BO. Signed-off-by: Monk Liu <Monk.Liu@amd.com> Signed-off-by: Xiangliang Yu <Xiangliang.Yu@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Reviewed-by: Chunming Zhou <david1.zhou@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c607
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h2
3 files changed, 620 insertions, 2 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e8fbd7a791d3..b5ad548e2503 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -183,6 +183,11 @@ enum amdgpu_thermal_irq {
183 AMDGPU_THERMAL_IRQ_LAST 183 AMDGPU_THERMAL_IRQ_LAST
184}; 184};
185 185
186enum amdgpu_kiq_irq {
187 AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
188 AMDGPU_CP_KIQ_IRQ_LAST
189};
190
186int amdgpu_set_clockgating_state(struct amdgpu_device *adev, 191int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
187 enum amd_ip_block_type block_type, 192 enum amd_ip_block_type block_type,
188 enum amd_clockgating_state state); 193 enum amd_clockgating_state state);
@@ -775,6 +780,13 @@ struct amdgpu_mec {
775 u32 num_queue; 780 u32 num_queue;
776}; 781};
777 782
783struct amdgpu_kiq {
784 u64 eop_gpu_addr;
785 struct amdgpu_bo *eop_obj;
786 struct amdgpu_ring ring;
787 struct amdgpu_irq_src irq;
788};
789
778/* 790/*
779 * GPU scratch registers structures, functions & helpers 791 * GPU scratch registers structures, functions & helpers
780 */ 792 */
@@ -850,6 +862,7 @@ struct amdgpu_gfx {
850 struct amdgpu_gca_config config; 862 struct amdgpu_gca_config config;
851 struct amdgpu_rlc rlc; 863 struct amdgpu_rlc rlc;
852 struct amdgpu_mec mec; 864 struct amdgpu_mec mec;
865 struct amdgpu_kiq kiq;
853 struct amdgpu_scratch scratch; 866 struct amdgpu_scratch scratch;
854 const struct firmware *me_fw; /* ME firmware */ 867 const struct firmware *me_fw; /* ME firmware */
855 uint32_t me_fw_version; 868 uint32_t me_fw_version;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 71ab1eb47909..d604ba37541f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -1367,6 +1367,42 @@ static void gfx_v8_0_mec_fini(struct amdgpu_device *adev)
1367 } 1367 }
1368} 1368}
1369 1369
1370static int gfx_v8_0_kiq_init_ring(struct amdgpu_device *adev,
1371 struct amdgpu_ring *ring,
1372 struct amdgpu_irq_src *irq)
1373{
1374 int r = 0;
1375
1376 ring->adev = NULL;
1377 ring->ring_obj = NULL;
1378 ring->use_doorbell = true;
1379 ring->doorbell_index = AMDGPU_DOORBELL_KIQ;
1380 if (adev->gfx.mec2_fw) {
1381 ring->me = 2;
1382 ring->pipe = 0;
1383 } else {
1384 ring->me = 1;
1385 ring->pipe = 1;
1386 }
1387
1388 irq->data = ring;
1389 ring->queue = 0;
1390 sprintf(ring->name, "kiq %d.%d.%d", ring->me, ring->pipe, ring->queue);
1391 r = amdgpu_ring_init(adev, ring, 1024,
1392 irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
1393 if (r)
1394 dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
1395
1396 return r;
1397}
1398
1399static void gfx_v8_0_kiq_free_ring(struct amdgpu_ring *ring,
1400 struct amdgpu_irq_src *irq)
1401{
1402 amdgpu_ring_fini(ring);
1403 irq->data = NULL;
1404}
1405
1370#define MEC_HPD_SIZE 2048 1406#define MEC_HPD_SIZE 2048
1371 1407
1372static int gfx_v8_0_mec_init(struct amdgpu_device *adev) 1408static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
@@ -1421,6 +1457,35 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev)
1421 return 0; 1457 return 0;
1422} 1458}
1423 1459
1460static void gfx_v8_0_kiq_fini(struct amdgpu_device *adev)
1461{
1462 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1463
1464 amdgpu_bo_free_kernel(&kiq->eop_obj, &kiq->eop_gpu_addr, NULL);
1465 kiq->eop_obj = NULL;
1466}
1467
1468static int gfx_v8_0_kiq_init(struct amdgpu_device *adev)
1469{
1470 int r;
1471 u32 *hpd;
1472 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
1473
1474 r = amdgpu_bo_create_kernel(adev, MEC_HPD_SIZE, PAGE_SIZE,
1475 AMDGPU_GEM_DOMAIN_GTT, &kiq->eop_obj,
1476 &kiq->eop_gpu_addr, (void **)&hpd);
1477 if (r) {
1478 dev_warn(adev->dev, "failed to create KIQ bo (%d).\n", r);
1479 return r;
1480 }
1481
1482 memset(hpd, 0, MEC_HPD_SIZE);
1483
1484 amdgpu_bo_kunmap(kiq->eop_obj);
1485
1486 return 0;
1487}
1488
1424static const u32 vgpr_init_compute_shader[] = 1489static const u32 vgpr_init_compute_shader[] =
1425{ 1490{
1426 0x7e000209, 0x7e020208, 1491 0x7e000209, 0x7e020208,
@@ -1997,8 +2062,14 @@ static int gfx_v8_0_sw_init(void *handle)
1997{ 2062{
1998 int i, r; 2063 int i, r;
1999 struct amdgpu_ring *ring; 2064 struct amdgpu_ring *ring;
2065 struct amdgpu_kiq *kiq;
2000 struct amdgpu_device *adev = (struct amdgpu_device *)handle; 2066 struct amdgpu_device *adev = (struct amdgpu_device *)handle;
2001 2067
2068 /* KIQ event */
2069 r = amdgpu_irq_add_id(adev, 178, &adev->gfx.kiq.irq);
2070 if (r)
2071 return r;
2072
2002 /* EOP Event */ 2073 /* EOP Event */
2003 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq); 2074 r = amdgpu_irq_add_id(adev, 181, &adev->gfx.eop_irq);
2004 if (r) 2075 if (r)
@@ -2036,6 +2107,17 @@ static int gfx_v8_0_sw_init(void *handle)
2036 return r; 2107 return r;
2037 } 2108 }
2038 2109
2110 r = gfx_v8_0_kiq_init(adev);
2111 if (r) {
2112 DRM_ERROR("Failed to init KIQ BOs!\n");
2113 return r;
2114 }
2115
2116 kiq = &adev->gfx.kiq;
2117 r = gfx_v8_0_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
2118 if (r)
2119 return r;
2120
2039 /* set up the gfx ring */ 2121 /* set up the gfx ring */
2040 for (i = 0; i < adev->gfx.num_gfx_rings; i++) { 2122 for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
2041 ring = &adev->gfx.gfx_ring[i]; 2123 ring = &adev->gfx.gfx_ring[i];
@@ -2119,7 +2201,9 @@ static int gfx_v8_0_sw_fini(void *handle)
2119 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); 2201 amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
2120 for (i = 0; i < adev->gfx.num_compute_rings; i++) 2202 for (i = 0; i < adev->gfx.num_compute_rings; i++)
2121 amdgpu_ring_fini(&adev->gfx.compute_ring[i]); 2203 amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
2204 gfx_v8_0_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
2122 2205
2206 gfx_v8_0_kiq_fini(adev);
2123 gfx_v8_0_mec_fini(adev); 2207 gfx_v8_0_mec_fini(adev);
2124 gfx_v8_0_rlc_fini(adev); 2208 gfx_v8_0_rlc_fini(adev);
2125 gfx_v8_0_free_microcode(adev); 2209 gfx_v8_0_free_microcode(adev);
@@ -4495,6 +4579,393 @@ static void gfx_v8_0_cp_compute_fini(struct amdgpu_device *adev)
4495 } 4579 }
4496} 4580}
4497 4581
4582/* KIQ functions */
4583static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring)
4584{
4585 uint32_t tmp;
4586 struct amdgpu_device *adev = ring->adev;
4587
4588 /* tell RLC which is KIQ queue */
4589 tmp = RREG32(mmRLC_CP_SCHEDULERS);
4590 tmp &= 0xffffff00;
4591 tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue);
4592 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4593 tmp |= 0x80;
4594 WREG32(mmRLC_CP_SCHEDULERS, tmp);
4595}
4596
4597static void gfx_v8_0_kiq_enable(struct amdgpu_ring *ring)
4598{
4599 amdgpu_ring_alloc(ring, 8);
4600 /* set resources */
4601 amdgpu_ring_write(ring, PACKET3(PACKET3_SET_RESOURCES, 6));
4602 amdgpu_ring_write(ring, 0); /* vmid_mask:0 queue_type:0 (KIQ) */
4603 amdgpu_ring_write(ring, 0x000000FF); /* queue mask lo */
4604 amdgpu_ring_write(ring, 0); /* queue mask hi */
4605 amdgpu_ring_write(ring, 0); /* gws mask lo */
4606 amdgpu_ring_write(ring, 0); /* gws mask hi */
4607 amdgpu_ring_write(ring, 0); /* oac mask */
4608 amdgpu_ring_write(ring, 0); /* gds heap base:0, gds heap size:0 */
4609 amdgpu_ring_commit(ring);
4610 udelay(50);
4611}
4612
4613static void gfx_v8_0_map_queue_enable(struct amdgpu_ring *kiq_ring,
4614 struct amdgpu_ring *ring)
4615{
4616 struct amdgpu_device *adev = kiq_ring->adev;
4617 uint64_t mqd_addr, wptr_addr;
4618
4619 mqd_addr = amdgpu_bo_gpu_offset(ring->mqd_obj);
4620 wptr_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4621 amdgpu_ring_alloc(kiq_ring, 8);
4622
4623 amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
4624 /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
4625 amdgpu_ring_write(kiq_ring, 0x21010000);
4626 amdgpu_ring_write(kiq_ring, (ring->doorbell_index << 2) |
4627 (ring->queue << 26) |
4628 (ring->pipe << 29) |
4629 ((ring->me == 1 ? 0 : 1) << 31)); /* doorbell */
4630 amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
4631 amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
4632 amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
4633 amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
4634 amdgpu_ring_commit(kiq_ring);
4635 udelay(50);
4636}
4637
4638static int gfx_v8_0_mqd_init(struct amdgpu_device *adev,
4639 struct vi_mqd *mqd,
4640 uint64_t mqd_gpu_addr,
4641 uint64_t eop_gpu_addr,
4642 struct amdgpu_ring *ring)
4643{
4644 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4645 uint32_t tmp;
4646
4647 mqd->header = 0xC0310800;
4648 mqd->compute_pipelinestat_enable = 0x00000001;
4649 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
4650 mqd->compute_static_thread_mgmt_se1 = 0xffffffff;
4651 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4652 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4653 mqd->compute_misc_reserved = 0x00000003;
4654
4655 eop_base_addr = eop_gpu_addr >> 8;
4656 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4657 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
4658
4659 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4660 tmp = RREG32(mmCP_HQD_EOP_CONTROL);
4661 tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE,
4662 (order_base_2(MEC_HPD_SIZE / 4) - 1));
4663
4664 mqd->cp_hqd_eop_control = tmp;
4665
4666 /* enable doorbell? */
4667 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4668
4669 if (ring->use_doorbell)
4670 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4671 DOORBELL_EN, 1);
4672 else
4673 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4674 DOORBELL_EN, 0);
4675
4676 mqd->cp_hqd_pq_doorbell_control = tmp;
4677
4678 /* disable the queue if it's active */
4679 mqd->cp_hqd_dequeue_request = 0;
4680 mqd->cp_hqd_pq_rptr = 0;
4681 mqd->cp_hqd_pq_wptr = 0;
4682
4683 /* set the pointer to the MQD */
4684 mqd->cp_mqd_base_addr_lo = mqd_gpu_addr & 0xfffffffc;
4685 mqd->cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4686
4687 /* set MQD vmid to 0 */
4688 tmp = RREG32(mmCP_MQD_CONTROL);
4689 tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0);
4690 mqd->cp_mqd_control = tmp;
4691
4692 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4693 hqd_gpu_addr = ring->gpu_addr >> 8;
4694 mqd->cp_hqd_pq_base_lo = hqd_gpu_addr;
4695 mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4696
4697 /* set up the HQD, this is similar to CP_RB0_CNTL */
4698 tmp = RREG32(mmCP_HQD_PQ_CONTROL);
4699 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE,
4700 (order_base_2(ring->ring_size / 4) - 1));
4701 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE,
4702 ((order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1) << 8));
4703#ifdef __BIG_ENDIAN
4704 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ENDIAN_SWAP, 1);
4705#endif
4706 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 0);
4707 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, ROQ_PQ_IB_FLIP, 0);
4708 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1);
4709 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1);
4710 mqd->cp_hqd_pq_control = tmp;
4711
4712 /* set the wb address whether it's enabled or not */
4713 wb_gpu_addr = adev->wb.gpu_addr + (ring->rptr_offs * 4);
4714 mqd->cp_hqd_pq_rptr_report_addr_lo = wb_gpu_addr & 0xfffffffc;
4715 mqd->cp_hqd_pq_rptr_report_addr_hi =
4716 upper_32_bits(wb_gpu_addr) & 0xffff;
4717
4718 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4719 wb_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4);
4720 mqd->cp_hqd_pq_wptr_poll_addr_lo = wb_gpu_addr & 0xfffffffc;
4721 mqd->cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4722
4723 tmp = 0;
4724 /* enable the doorbell if requested */
4725 if (ring->use_doorbell) {
4726 tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
4727 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4728 DOORBELL_OFFSET, ring->doorbell_index);
4729
4730 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4731 DOORBELL_EN, 1);
4732 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4733 DOORBELL_SOURCE, 0);
4734 tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
4735 DOORBELL_HIT, 0);
4736 }
4737
4738 mqd->cp_hqd_pq_doorbell_control = tmp;
4739
4740 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4741 ring->wptr = 0;
4742 mqd->cp_hqd_pq_wptr = ring->wptr;
4743 mqd->cp_hqd_pq_rptr = RREG32(mmCP_HQD_PQ_RPTR);
4744
4745 /* set the vmid for the queue */
4746 mqd->cp_hqd_vmid = 0;
4747
4748 tmp = RREG32(mmCP_HQD_PERSISTENT_STATE);
4749 tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x53);
4750 mqd->cp_hqd_persistent_state = tmp;
4751
4752 /* activate the queue */
4753 mqd->cp_hqd_active = 1;
4754
4755 return 0;
4756}
4757
4758static int gfx_v8_0_kiq_init_register(struct amdgpu_device *adev,
4759 struct vi_mqd *mqd,
4760 struct amdgpu_ring *ring)
4761{
4762 uint32_t tmp;
4763 int j;
4764
4765 /* disable wptr polling */
4766 tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
4767 tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
4768 WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
4769
4770 WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
4771 WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
4772
4773 /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4774 WREG32(mmCP_HQD_EOP_CONTROL, mqd->cp_hqd_eop_control);
4775
4776 /* enable doorbell? */
4777 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4778
4779 /* disable the queue if it's active */
4780 if (RREG32(mmCP_HQD_ACTIVE) & 1) {
4781 WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
4782 for (j = 0; j < adev->usec_timeout; j++) {
4783 if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
4784 break;
4785 udelay(1);
4786 }
4787 WREG32(mmCP_HQD_DEQUEUE_REQUEST, mqd->cp_hqd_dequeue_request);
4788 WREG32(mmCP_HQD_PQ_RPTR, mqd->cp_hqd_pq_rptr);
4789 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4790 }
4791
4792 /* set the pointer to the MQD */
4793 WREG32(mmCP_MQD_BASE_ADDR, mqd->cp_mqd_base_addr_lo);
4794 WREG32(mmCP_MQD_BASE_ADDR_HI, mqd->cp_mqd_base_addr_hi);
4795
4796 /* set MQD vmid to 0 */
4797 WREG32(mmCP_MQD_CONTROL, mqd->cp_mqd_control);
4798
4799 /* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4800 WREG32(mmCP_HQD_PQ_BASE, mqd->cp_hqd_pq_base_lo);
4801 WREG32(mmCP_HQD_PQ_BASE_HI, mqd->cp_hqd_pq_base_hi);
4802
4803 /* set up the HQD, this is similar to CP_RB0_CNTL */
4804 WREG32(mmCP_HQD_PQ_CONTROL, mqd->cp_hqd_pq_control);
4805
4806 /* set the wb address whether it's enabled or not */
4807 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR,
4808 mqd->cp_hqd_pq_rptr_report_addr_lo);
4809 WREG32(mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4810 mqd->cp_hqd_pq_rptr_report_addr_hi);
4811
4812 /* only used if CP_PQ_WPTR_POLL_CNTL.CP_PQ_WPTR_POLL_CNTL__EN_MASK=1 */
4813 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR, mqd->cp_hqd_pq_wptr_poll_addr_lo);
4814 WREG32(mmCP_HQD_PQ_WPTR_POLL_ADDR_HI, mqd->cp_hqd_pq_wptr_poll_addr_hi);
4815
4816 /* enable the doorbell if requested */
4817 if (ring->use_doorbell) {
4818 if ((adev->asic_type == CHIP_CARRIZO) ||
4819 (adev->asic_type == CHIP_FIJI) ||
4820 (adev->asic_type == CHIP_STONEY)) {
4821 WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER,
4822 AMDGPU_DOORBELL_KIQ << 2);
4823 WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER,
4824 AMDGPU_DOORBELL_MEC_RING7 << 2);
4825 }
4826 }
4827 WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
4828
4829 /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4830 WREG32(mmCP_HQD_PQ_WPTR, mqd->cp_hqd_pq_wptr);
4831
4832 /* set the vmid for the queue */
4833 WREG32(mmCP_HQD_VMID, mqd->cp_hqd_vmid);
4834
4835 WREG32(mmCP_HQD_PERSISTENT_STATE, mqd->cp_hqd_persistent_state);
4836
4837 /* activate the queue */
4838 WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
4839
4840 if (ring->use_doorbell) {
4841 tmp = RREG32(mmCP_PQ_STATUS);
4842 tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
4843 WREG32(mmCP_PQ_STATUS, tmp);
4844 }
4845
4846 return 0;
4847}
4848
4849static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring,
4850 struct vi_mqd *mqd,
4851 u64 mqd_gpu_addr)
4852{
4853 struct amdgpu_device *adev = ring->adev;
4854 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
4855 uint64_t eop_gpu_addr;
4856 bool is_kiq = false;
4857
4858 if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
4859 is_kiq = true;
4860
4861 if (is_kiq) {
4862 eop_gpu_addr = kiq->eop_gpu_addr;
4863 gfx_v8_0_kiq_setting(&kiq->ring);
4864 } else
4865 eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr +
4866 ring->queue * MEC_HPD_SIZE;
4867
4868 mutex_lock(&adev->srbm_mutex);
4869 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4870
4871 gfx_v8_0_mqd_init(adev, mqd, mqd_gpu_addr, eop_gpu_addr, ring);
4872
4873 if (is_kiq)
4874 gfx_v8_0_kiq_init_register(adev, mqd, ring);
4875
4876 vi_srbm_select(adev, 0, 0, 0, 0);
4877 mutex_unlock(&adev->srbm_mutex);
4878
4879 if (is_kiq)
4880 gfx_v8_0_kiq_enable(ring);
4881 else
4882 gfx_v8_0_map_queue_enable(&kiq->ring, ring);
4883
4884 return 0;
4885}
4886
4887static void gfx_v8_0_kiq_free_queue(struct amdgpu_device *adev)
4888{
4889 struct amdgpu_ring *ring = NULL;
4890 int i;
4891
4892 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4893 ring = &adev->gfx.compute_ring[i];
4894 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4895 ring->mqd_obj = NULL;
4896 }
4897
4898 ring = &adev->gfx.kiq.ring;
4899 amdgpu_bo_free_kernel(&ring->mqd_obj, NULL, NULL);
4900 ring->mqd_obj = NULL;
4901}
4902
4903static int gfx_v8_0_kiq_setup_queue(struct amdgpu_device *adev,
4904 struct amdgpu_ring *ring)
4905{
4906 struct vi_mqd *mqd;
4907 u64 mqd_gpu_addr;
4908 u32 *buf;
4909 int r = 0;
4910
4911 r = amdgpu_bo_create_kernel(adev, sizeof(struct vi_mqd), PAGE_SIZE,
4912 AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
4913 &mqd_gpu_addr, (void **)&buf);
4914 if (r) {
4915 dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
4916 return r;
4917 }
4918
4919 /* init the mqd struct */
4920 memset(buf, 0, sizeof(struct vi_mqd));
4921 mqd = (struct vi_mqd *)buf;
4922
4923 r = gfx_v8_0_kiq_init_queue(ring, mqd, mqd_gpu_addr);
4924 if (r)
4925 return r;
4926
4927 amdgpu_bo_kunmap(ring->mqd_obj);
4928
4929 return 0;
4930}
4931
4932static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev)
4933{
4934 struct amdgpu_ring *ring = NULL;
4935 int r, i;
4936
4937 ring = &adev->gfx.kiq.ring;
4938 r = gfx_v8_0_kiq_setup_queue(adev, ring);
4939 if (r)
4940 return r;
4941
4942 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4943 ring = &adev->gfx.compute_ring[i];
4944 r = gfx_v8_0_kiq_setup_queue(adev, ring);
4945 if (r)
4946 return r;
4947 }
4948
4949 gfx_v8_0_cp_compute_enable(adev, true);
4950
4951 for (i = 0; i < adev->gfx.num_compute_rings; i++) {
4952 ring = &adev->gfx.compute_ring[i];
4953
4954 ring->ready = true;
4955 r = amdgpu_ring_test_ring(ring);
4956 if (r)
4957 ring->ready = false;
4958 }
4959
4960 ring = &adev->gfx.kiq.ring;
4961 ring->ready = true;
4962 r = amdgpu_ring_test_ring(ring);
4963 if (r)
4964 ring->ready = false;
4965
4966 return 0;
4967}
4968
4498static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev) 4969static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
4499{ 4970{
4500 int r, i, j; 4971 int r, i, j;
@@ -4795,7 +5266,10 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
4795 if (r) 5266 if (r)
4796 return r; 5267 return r;
4797 5268
4798 r = gfx_v8_0_cp_compute_resume(adev); 5269 if (amdgpu_sriov_vf(adev))
5270 r = gfx_v8_0_kiq_resume(adev);
5271 else
5272 r = gfx_v8_0_cp_compute_resume(adev);
4799 if (r) 5273 if (r)
4800 return r; 5274 return r;
4801 5275
@@ -4834,6 +5308,7 @@ static int gfx_v8_0_hw_fini(void *handle)
4834 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); 5308 amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
4835 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); 5309 amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
4836 if (amdgpu_sriov_vf(adev)) { 5310 if (amdgpu_sriov_vf(adev)) {
5311 gfx_v8_0_kiq_free_queue(adev);
4837 pr_debug("For SRIOV client, shouldn't do anything.\n"); 5312 pr_debug("For SRIOV client, shouldn't do anything.\n");
4838 return 0; 5313 return 0;
4839 } 5314 }
@@ -5930,7 +6405,8 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
5930{ 6405{
5931 u32 ref_and_mask, reg_mem_engine; 6406 u32 ref_and_mask, reg_mem_engine;
5932 6407
5933 if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { 6408 if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) ||
6409 (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)) {
5934 switch (ring->me) { 6410 switch (ring->me) {
5935 case 1: 6411 case 1:
5936 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe; 6412 ref_and_mask = GPU_HDP_FLUSH_DONE__CP2_MASK << ring->pipe;
@@ -6143,6 +6619,32 @@ static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
6143 amdgpu_ring_write(ring, upper_32_bits(seq)); 6619 amdgpu_ring_write(ring, upper_32_bits(seq));
6144} 6620}
6145 6621
6622static void gfx_v8_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
6623 u64 seq, unsigned int flags)
6624{
6625 /* we only allocate 32bit for each seq wb address */
6626 if (flags & AMDGPU_FENCE_FLAG_64BIT)
6627 BUG();
6628
6629 /* write fence seq to the "addr" */
6630 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6631 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6632 WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
6633 amdgpu_ring_write(ring, lower_32_bits(addr));
6634 amdgpu_ring_write(ring, upper_32_bits(addr));
6635 amdgpu_ring_write(ring, lower_32_bits(seq));
6636
6637 if (flags & AMDGPU_FENCE_FLAG_INT) {
6638 /* set register to trigger INT */
6639 amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6640 amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
6641 WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
6642 amdgpu_ring_write(ring, mmCPC_INT_STATUS);
6643 amdgpu_ring_write(ring, 0);
6644 amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
6645 }
6646}
6647
6146static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) 6648static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
6147{ 6649{
6148 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0)); 6650 amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
@@ -6324,6 +6826,72 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev,
6324 return 0; 6826 return 0;
6325} 6827}
6326 6828
6829static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
6830 struct amdgpu_irq_src *src,
6831 unsigned int type,
6832 enum amdgpu_interrupt_state state)
6833{
6834 uint32_t tmp, target;
6835 struct amdgpu_ring *ring = (struct amdgpu_ring *)src->data;
6836
6837 BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6838
6839 if (ring->me == 1)
6840 target = mmCP_ME1_PIPE0_INT_CNTL;
6841 else
6842 target = mmCP_ME2_PIPE0_INT_CNTL;
6843 target += ring->pipe;
6844
6845 switch (type) {
6846 case AMDGPU_CP_KIQ_IRQ_DRIVER0:
6847 if (state == AMDGPU_IRQ_STATE_DISABLE) {
6848 tmp = RREG32(mmCPC_INT_CNTL);
6849 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6850 GENERIC2_INT_ENABLE, 0);
6851 WREG32(mmCPC_INT_CNTL, tmp);
6852
6853 tmp = RREG32(target);
6854 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6855 GENERIC2_INT_ENABLE, 0);
6856 WREG32(target, tmp);
6857 } else {
6858 tmp = RREG32(mmCPC_INT_CNTL);
6859 tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
6860 GENERIC2_INT_ENABLE, 1);
6861 WREG32(mmCPC_INT_CNTL, tmp);
6862
6863 tmp = RREG32(target);
6864 tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
6865 GENERIC2_INT_ENABLE, 1);
6866 WREG32(target, tmp);
6867 }
6868 break;
6869 default:
6870 BUG(); /* kiq only support GENERIC2_INT now */
6871 break;
6872 }
6873 return 0;
6874}
6875
6876static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev,
6877 struct amdgpu_irq_src *source,
6878 struct amdgpu_iv_entry *entry)
6879{
6880 u8 me_id, pipe_id, queue_id;
6881 struct amdgpu_ring *ring = (struct amdgpu_ring *)source->data;
6882
6883 BUG_ON(!ring || (ring->funcs->type != AMDGPU_RING_TYPE_KIQ));
6884
6885 me_id = (entry->ring_id & 0x0c) >> 2;
6886 pipe_id = (entry->ring_id & 0x03) >> 0;
6887 queue_id = (entry->ring_id & 0x70) >> 4;
6888 DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n",
6889 me_id, pipe_id, queue_id);
6890
6891 amdgpu_fence_process(ring);
6892 return 0;
6893}
6894
6327static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { 6895static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
6328 .name = "gfx_v8_0", 6896 .name = "gfx_v8_0",
6329 .early_init = gfx_v8_0_early_init, 6897 .early_init = gfx_v8_0_early_init,
@@ -6404,10 +6972,37 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
6404 .pad_ib = amdgpu_ring_generic_pad_ib, 6972 .pad_ib = amdgpu_ring_generic_pad_ib,
6405}; 6973};
6406 6974
6975static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
6976 .type = AMDGPU_RING_TYPE_KIQ,
6977 .align_mask = 0xff,
6978 .nop = PACKET3(PACKET3_NOP, 0x3FFF),
6979 .get_rptr = gfx_v8_0_ring_get_rptr,
6980 .get_wptr = gfx_v8_0_ring_get_wptr_compute,
6981 .set_wptr = gfx_v8_0_ring_set_wptr_compute,
6982 .emit_frame_size =
6983 20 + /* gfx_v8_0_ring_emit_gds_switch */
6984 7 + /* gfx_v8_0_ring_emit_hdp_flush */
6985 5 + /* gfx_v8_0_ring_emit_hdp_invalidate */
6986 7 + /* gfx_v8_0_ring_emit_pipeline_sync */
6987 17 + /* gfx_v8_0_ring_emit_vm_flush */
6988 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
6989 .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */
6990 .emit_ib = gfx_v8_0_ring_emit_ib_compute,
6991 .emit_fence = gfx_v8_0_ring_emit_fence_kiq,
6992 .emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
6993 .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
6994 .test_ring = gfx_v8_0_ring_test_ring,
6995 .test_ib = gfx_v8_0_ring_test_ib,
6996 .insert_nop = amdgpu_ring_insert_nop,
6997 .pad_ib = amdgpu_ring_generic_pad_ib,
6998};
6999
6407static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) 7000static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
6408{ 7001{
6409 int i; 7002 int i;
6410 7003
7004 adev->gfx.kiq.ring.funcs = &gfx_v8_0_ring_funcs_kiq;
7005
6411 for (i = 0; i < adev->gfx.num_gfx_rings; i++) 7006 for (i = 0; i < adev->gfx.num_gfx_rings; i++)
6412 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx; 7007 adev->gfx.gfx_ring[i].funcs = &gfx_v8_0_ring_funcs_gfx;
6413 7008
@@ -6430,6 +7025,11 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = {
6430 .process = gfx_v8_0_priv_inst_irq, 7025 .process = gfx_v8_0_priv_inst_irq,
6431}; 7026};
6432 7027
7028static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = {
7029 .set = gfx_v8_0_kiq_set_interrupt_state,
7030 .process = gfx_v8_0_kiq_irq,
7031};
7032
6433static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) 7033static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6434{ 7034{
6435 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST; 7035 adev->gfx.eop_irq.num_types = AMDGPU_CP_IRQ_LAST;
@@ -6440,6 +7040,9 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev)
6440 7040
6441 adev->gfx.priv_inst_irq.num_types = 1; 7041 adev->gfx.priv_inst_irq.num_types = 1;
6442 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; 7042 adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs;
7043
7044 adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST;
7045 adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs;
6443} 7046}
6444 7047
6445static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev) 7048static void gfx_v8_0_set_rlc_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index 11746f22d0c5..7a3863a45f0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -360,6 +360,8 @@
360#define PACKET3_WAIT_ON_CE_COUNTER 0x86 360#define PACKET3_WAIT_ON_CE_COUNTER 0x86
361#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88 361#define PACKET3_WAIT_ON_DE_COUNTER_DIFF 0x88
362#define PACKET3_SWITCH_BUFFER 0x8B 362#define PACKET3_SWITCH_BUFFER 0x8B
363#define PACKET3_SET_RESOURCES 0xA0
364#define PACKET3_MAP_QUEUES 0xA2
363 365
364#define VCE_CMD_NO_OP 0x00000000 366#define VCE_CMD_NO_OP 0x00000000
365#define VCE_CMD_END 0x00000001 367#define VCE_CMD_END 0x00000001