drm/amdgpu: fix vulkan test performance drop and hang on VI

caused by not program dynamic_cu_mask_addr in the KIQ MQD. v2: create struct vi_mqd_allocation in FB which will contain 1. PM4 MQD structure. 2. Write Pointer Poll Memory. 3. Read Pointer Report Memory 4. Dynamic CU Mask. 5. Dynamic RB Mask. Signed-off-by: Rex Zhu <Rex.Zhu@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
author: Rex Zhu <Rex.Zhu@amd.com> 2017-06-26 02:39:36 -0400
committer: Alex Deucher <alexander.deucher@amd.com> 2017-06-29 12:43:43 -0400
commit: 6b0fa871a9a2d83dd869ca40a7fd65a935d3564c (patch)
tree: 62a6e23ef2af4c4bc7b821b3183dcf4e4402fe63 /drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
parent: 12d016626f99f48edbf5b006625b4e8c0de1eec7 (diff)
1 files changed, 17 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 142924212b43..9a268272d38e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -40,7 +40,6 @@
 #include "bif/bif_5_0_d.h"
 #include "bif/bif_5_0_sh_mask.h"
 #include "gca/gfx_8_0_d.h"
 #include "gca/gfx_8_0_enum.h"
 #include "gca/gfx_8_0_sh_mask.h"
@@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle)
                return r;
        /* create MQD for all compute queues as well as KIQ for SRIOV case */
-        r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd));
+        r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
        if (r)
                return r;
@@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
        uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
        uint32_t tmp;
-        /* init the mqd struct */
-        memset(mqd, 0, sizeof(struct vi_mqd));
        mqd->header = 0xC0310800;
        mqd->compute_pipelinestat_enable = 0x00000001;
        mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
        mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
        mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
        mqd->compute_misc_reserved = 0x00000003;
+        if (!(adev->flags & AMD_IS_APU)) {
+                mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
+                                             + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+                mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
+                                             + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
+        }
        eop_base_addr = ring->eop_gpu_addr >> 8;
        mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
        mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
        if (adev->gfx.in_reset) { /* for GPU_RESET case */
                /* reset MQD to a clean status */
                if (adev->gfx.mec.mqd_backup[mqd_idx])
-                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
                /* reset ring buffer */
                ring->wptr = 0;
@@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
                vi_srbm_select(adev, 0, 0, 0, 0);
                mutex_unlock(&adev->srbm_mutex);
        } else {
+                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+                ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
+                ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
                mutex_lock(&adev->srbm_mutex);
                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
                gfx_v8_0_mqd_init(ring);
@@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
                mutex_unlock(&adev->srbm_mutex);
                if (adev->gfx.mec.mqd_backup[mqd_idx])
-                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
        }
        return r;
@@ -4947,6 +4951,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
        int mqd_idx = ring - &adev->gfx.compute_ring[0];
        if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
+                memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
+                ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
+                ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
                mutex_lock(&adev->srbm_mutex);
                vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
                gfx_v8_0_mqd_init(ring);
@@ -4954,11 +4961,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
                mutex_unlock(&adev->srbm_mutex);
                if (adev->gfx.mec.mqd_backup[mqd_idx])
-                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));
+                        memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
        } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
                /* reset MQD to a clean status */
                if (adev->gfx.mec.mqd_backup[mqd_idx])
-                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));
+                        memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
                /* reset ring buffer */
                ring->wptr = 0;
                amdgpu_ring_clear_ring(ring);
author	Rex Zhu <Rex.Zhu@amd.com>	2017-06-26 02:39:36 -0400
committer	Alex Deucher <alexander.deucher@amd.com>	2017-06-29 12:43:43 -0400
commit	6b0fa871a9a2d83dd869ca40a7fd65a935d3564c (patch)
tree	62a6e23ef2af4c4bc7b821b3183dcf4e4402fe63 /drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
parent	12d016626f99f48edbf5b006625b4e8c0de1eec7 (diff)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 142924212b43..9a268272d38e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -40,7 +40,6 @@
40		40
41	#include "bif/bif_5_0_d.h"	41	#include "bif/bif_5_0_d.h"
42	#include "bif/bif_5_0_sh_mask.h"	42	#include "bif/bif_5_0_sh_mask.h"
43
44	#include "gca/gfx_8_0_d.h"	43	#include "gca/gfx_8_0_d.h"
45	#include "gca/gfx_8_0_enum.h"	44	#include "gca/gfx_8_0_enum.h"
46	#include "gca/gfx_8_0_sh_mask.h"	45	#include "gca/gfx_8_0_sh_mask.h"
@@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle)
2100	return r;	2099	return r;
2101		2100
2102	/* create MQD for all compute queues as well as KIQ for SRIOV case */	2101	/* create MQD for all compute queues as well as KIQ for SRIOV case */
2103	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd));	2102	r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2104	if (r)	2103	if (r)
2105	return r;	2104	return r;
2106		2105
@@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4715	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;	4714	uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4716	uint32_t tmp;	4715	uint32_t tmp;
4717		4716
4718	/* init the mqd struct */
4719	memset(mqd, 0, sizeof(struct vi_mqd));
4720
4721	mqd->header = 0xC0310800;	4717	mqd->header = 0xC0310800;
4722	mqd->compute_pipelinestat_enable = 0x00000001;	4718	mqd->compute_pipelinestat_enable = 0x00000001;
4723	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;	4719	mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4725	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;	4721	mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4726	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;	4722	mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4727	mqd->compute_misc_reserved = 0x00000003;	4723	mqd->compute_misc_reserved = 0x00000003;
4728		4724	if (!(adev->flags & AMD_IS_APU)) {
		4725	mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
		4726	+ offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
		4727	mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
		4728	+ offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
		4729	}
4729	eop_base_addr = ring->eop_gpu_addr >> 8;	4730	eop_base_addr = ring->eop_gpu_addr >> 8;
4730	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;	4731	mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4731	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);	4732	mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4900	if (adev->gfx.in_reset) { /* for GPU_RESET case */	4901	if (adev->gfx.in_reset) { /* for GPU_RESET case */
4901	/* reset MQD to a clean status */	4902	/* reset MQD to a clean status */
4902	if (adev->gfx.mec.mqd_backup[mqd_idx])	4903	if (adev->gfx.mec.mqd_backup[mqd_idx])
4903	memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));	4904	memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4904		4905
4905	/* reset ring buffer */	4906	/* reset ring buffer */
4906	ring->wptr = 0;	4907	ring->wptr = 0;
@@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4916	vi_srbm_select(adev, 0, 0, 0, 0);	4917	vi_srbm_select(adev, 0, 0, 0, 0);
4917	mutex_unlock(&adev->srbm_mutex);	4918	mutex_unlock(&adev->srbm_mutex);
4918	} else {	4919	} else {
		4920	memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
		4921	((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
		4922	((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4919	mutex_lock(&adev->srbm_mutex);	4923	mutex_lock(&adev->srbm_mutex);
4920	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);	4924	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4921	gfx_v8_0_mqd_init(ring);	4925	gfx_v8_0_mqd_init(ring);
@@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4929	mutex_unlock(&adev->srbm_mutex);	4933	mutex_unlock(&adev->srbm_mutex);
4930		4934
4931	if (adev->gfx.mec.mqd_backup[mqd_idx])	4935	if (adev->gfx.mec.mqd_backup[mqd_idx])
4932	memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));	4936	memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4933	}	4937	}
4934		4938
4935	return r;	4939	return r;
@@ -4947,6 +4951,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4947	int mqd_idx = ring - &adev->gfx.compute_ring[0];	4951	int mqd_idx = ring - &adev->gfx.compute_ring[0];
4948		4952
4949	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {	4953	if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
		4954	memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
		4955	((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
		4956	((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4950	mutex_lock(&adev->srbm_mutex);	4957	mutex_lock(&adev->srbm_mutex);
4951	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);	4958	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4952	gfx_v8_0_mqd_init(ring);	4959	gfx_v8_0_mqd_init(ring);
@@ -4954,11 +4961,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4954	mutex_unlock(&adev->srbm_mutex);	4961	mutex_unlock(&adev->srbm_mutex);
4955		4962
4956	if (adev->gfx.mec.mqd_backup[mqd_idx])	4963	if (adev->gfx.mec.mqd_backup[mqd_idx])
4957	memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd));	4964	memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4958	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */	4965	} else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4959	/* reset MQD to a clean status */	4966	/* reset MQD to a clean status */
4960	if (adev->gfx.mec.mqd_backup[mqd_idx])	4967	if (adev->gfx.mec.mqd_backup[mqd_idx])
4961	memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd));	4968	memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4962	/* reset ring buffer */	4969	/* reset ring buffer */
4963	ring->wptr = 0;	4970	ring->wptr = 0;
4964	amdgpu_ring_clear_ring(ring);	4971	amdgpu_ring_clear_ring(ring);