aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRex Zhu <Rex.Zhu@amd.com>2017-06-26 02:39:36 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-06-29 12:43:43 -0400
commit6b0fa871a9a2d83dd869ca40a7fd65a935d3564c (patch)
tree62a6e23ef2af4c4bc7b821b3183dcf4e4402fe63
parent12d016626f99f48edbf5b006625b4e8c0de1eec7 (diff)
drm/amdgpu: fix vulkan test performance drop and hang on VI
caused by not program dynamic_cu_mask_addr in the KIQ MQD. v2: create struct vi_mqd_allocation in FB which will contain 1. PM4 MQD structure. 2. Write Pointer Poll Memory. 3. Read Pointer Report Memory 4. Dynamic CU Mask. 5. Dynamic RB Mask. Signed-off-by: Rex Zhu <Rex.Zhu@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c27
-rw-r--r--drivers/gpu/drm/amd/include/vi_structs.h268
2 files changed, 285 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 142924212b43..9a268272d38e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -40,7 +40,6 @@
40 40
41#include "bif/bif_5_0_d.h" 41#include "bif/bif_5_0_d.h"
42#include "bif/bif_5_0_sh_mask.h" 42#include "bif/bif_5_0_sh_mask.h"
43
44#include "gca/gfx_8_0_d.h" 43#include "gca/gfx_8_0_d.h"
45#include "gca/gfx_8_0_enum.h" 44#include "gca/gfx_8_0_enum.h"
46#include "gca/gfx_8_0_sh_mask.h" 45#include "gca/gfx_8_0_sh_mask.h"
@@ -2100,7 +2099,7 @@ static int gfx_v8_0_sw_init(void *handle)
2100 return r; 2099 return r;
2101 2100
2102 /* create MQD for all compute queues as well as KIQ for SRIOV case */ 2101 /* create MQD for all compute queues as well as KIQ for SRIOV case */
2103 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd)); 2102 r = amdgpu_gfx_compute_mqd_sw_init(adev, sizeof(struct vi_mqd_allocation));
2104 if (r) 2103 if (r)
2105 return r; 2104 return r;
2106 2105
@@ -4715,9 +4714,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4715 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr; 4714 uint64_t hqd_gpu_addr, wb_gpu_addr, eop_base_addr;
4716 uint32_t tmp; 4715 uint32_t tmp;
4717 4716
4718 /* init the mqd struct */
4719 memset(mqd, 0, sizeof(struct vi_mqd));
4720
4721 mqd->header = 0xC0310800; 4717 mqd->header = 0xC0310800;
4722 mqd->compute_pipelinestat_enable = 0x00000001; 4718 mqd->compute_pipelinestat_enable = 0x00000001;
4723 mqd->compute_static_thread_mgmt_se0 = 0xffffffff; 4719 mqd->compute_static_thread_mgmt_se0 = 0xffffffff;
@@ -4725,7 +4721,12 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
4725 mqd->compute_static_thread_mgmt_se2 = 0xffffffff; 4721 mqd->compute_static_thread_mgmt_se2 = 0xffffffff;
4726 mqd->compute_static_thread_mgmt_se3 = 0xffffffff; 4722 mqd->compute_static_thread_mgmt_se3 = 0xffffffff;
4727 mqd->compute_misc_reserved = 0x00000003; 4723 mqd->compute_misc_reserved = 0x00000003;
4728 4724 if (!(adev->flags & AMD_IS_APU)) {
4725 mqd->dynamic_cu_mask_addr_lo = lower_32_bits(ring->mqd_gpu_addr
4726 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4727 mqd->dynamic_cu_mask_addr_hi = upper_32_bits(ring->mqd_gpu_addr
4728 + offsetof(struct vi_mqd_allocation, dyamic_cu_mask));
4729 }
4729 eop_base_addr = ring->eop_gpu_addr >> 8; 4730 eop_base_addr = ring->eop_gpu_addr >> 8;
4730 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr; 4731 mqd->cp_hqd_eop_base_addr_lo = eop_base_addr;
4731 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); 4732 mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr);
@@ -4900,7 +4901,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4900 if (adev->gfx.in_reset) { /* for GPU_RESET case */ 4901 if (adev->gfx.in_reset) { /* for GPU_RESET case */
4901 /* reset MQD to a clean status */ 4902 /* reset MQD to a clean status */
4902 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4903 if (adev->gfx.mec.mqd_backup[mqd_idx])
4903 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4904 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4904 4905
4905 /* reset ring buffer */ 4906 /* reset ring buffer */
4906 ring->wptr = 0; 4907 ring->wptr = 0;
@@ -4916,6 +4917,9 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4916 vi_srbm_select(adev, 0, 0, 0, 0); 4917 vi_srbm_select(adev, 0, 0, 0, 0);
4917 mutex_unlock(&adev->srbm_mutex); 4918 mutex_unlock(&adev->srbm_mutex);
4918 } else { 4919 } else {
4920 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4921 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4922 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4919 mutex_lock(&adev->srbm_mutex); 4923 mutex_lock(&adev->srbm_mutex);
4920 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4924 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4921 gfx_v8_0_mqd_init(ring); 4925 gfx_v8_0_mqd_init(ring);
@@ -4929,7 +4933,7 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
4929 mutex_unlock(&adev->srbm_mutex); 4933 mutex_unlock(&adev->srbm_mutex);
4930 4934
4931 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4935 if (adev->gfx.mec.mqd_backup[mqd_idx])
4932 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4936 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4933 } 4937 }
4934 4938
4935 return r; 4939 return r;
@@ -4947,6 +4951,9 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4947 int mqd_idx = ring - &adev->gfx.compute_ring[0]; 4951 int mqd_idx = ring - &adev->gfx.compute_ring[0];
4948 4952
4949 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) { 4953 if (!adev->gfx.in_reset && !adev->gfx.in_suspend) {
4954 memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
4955 ((struct vi_mqd_allocation *)mqd)->dyamic_cu_mask = 0xFFFFFFFF;
4956 ((struct vi_mqd_allocation *)mqd)->dyamic_rb_mask = 0xFFFFFFFF;
4950 mutex_lock(&adev->srbm_mutex); 4957 mutex_lock(&adev->srbm_mutex);
4951 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); 4958 vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
4952 gfx_v8_0_mqd_init(ring); 4959 gfx_v8_0_mqd_init(ring);
@@ -4954,11 +4961,11 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring)
4954 mutex_unlock(&adev->srbm_mutex); 4961 mutex_unlock(&adev->srbm_mutex);
4955 4962
4956 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4963 if (adev->gfx.mec.mqd_backup[mqd_idx])
4957 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(*mqd)); 4964 memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct vi_mqd_allocation));
4958 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */ 4965 } else if (adev->gfx.in_reset) { /* for GPU_RESET case */
4959 /* reset MQD to a clean status */ 4966 /* reset MQD to a clean status */
4960 if (adev->gfx.mec.mqd_backup[mqd_idx]) 4967 if (adev->gfx.mec.mqd_backup[mqd_idx])
4961 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(*mqd)); 4968 memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct vi_mqd_allocation));
4962 /* reset ring buffer */ 4969 /* reset ring buffer */
4963 ring->wptr = 0; 4970 ring->wptr = 0;
4964 amdgpu_ring_clear_ring(ring); 4971 amdgpu_ring_clear_ring(ring);
diff --git a/drivers/gpu/drm/amd/include/vi_structs.h b/drivers/gpu/drm/amd/include/vi_structs.h
index b68f8efcdeae..ca93b5160ba6 100644
--- a/drivers/gpu/drm/amd/include/vi_structs.h
+++ b/drivers/gpu/drm/amd/include/vi_structs.h
@@ -195,6 +195,274 @@ struct vi_mqd {
195 uint32_t compute_wave_restore_addr_lo; 195 uint32_t compute_wave_restore_addr_lo;
196 uint32_t compute_wave_restore_addr_hi; 196 uint32_t compute_wave_restore_addr_hi;
197 uint32_t compute_wave_restore_control; 197 uint32_t compute_wave_restore_control;
198 uint32_t reserved9;
199 uint32_t reserved10;
200 uint32_t reserved11;
201 uint32_t reserved12;
202 uint32_t reserved13;
203 uint32_t reserved14;
204 uint32_t reserved15;
205 uint32_t reserved16;
206 uint32_t reserved17;
207 uint32_t reserved18;
208 uint32_t reserved19;
209 uint32_t reserved20;
210 uint32_t reserved21;
211 uint32_t reserved22;
212 uint32_t reserved23;
213 uint32_t reserved24;
214 uint32_t reserved25;
215 uint32_t reserved26;
216 uint32_t reserved27;
217 uint32_t reserved28;
218 uint32_t reserved29;
219 uint32_t reserved30;
220 uint32_t reserved31;
221 uint32_t reserved32;
222 uint32_t reserved33;
223 uint32_t reserved34;
224 uint32_t compute_user_data_0;
225 uint32_t compute_user_data_1;
226 uint32_t compute_user_data_2;
227 uint32_t compute_user_data_3;
228 uint32_t compute_user_data_4;
229 uint32_t compute_user_data_5;
230 uint32_t compute_user_data_6;
231 uint32_t compute_user_data_7;
232 uint32_t compute_user_data_8;
233 uint32_t compute_user_data_9;
234 uint32_t compute_user_data_10;
235 uint32_t compute_user_data_11;
236 uint32_t compute_user_data_12;
237 uint32_t compute_user_data_13;
238 uint32_t compute_user_data_14;
239 uint32_t compute_user_data_15;
240 uint32_t cp_compute_csinvoc_count_lo;
241 uint32_t cp_compute_csinvoc_count_hi;
242 uint32_t reserved35;
243 uint32_t reserved36;
244 uint32_t reserved37;
245 uint32_t cp_mqd_query_time_lo;
246 uint32_t cp_mqd_query_time_hi;
247 uint32_t cp_mqd_connect_start_time_lo;
248 uint32_t cp_mqd_connect_start_time_hi;
249 uint32_t cp_mqd_connect_end_time_lo;
250 uint32_t cp_mqd_connect_end_time_hi;
251 uint32_t cp_mqd_connect_end_wf_count;
252 uint32_t cp_mqd_connect_end_pq_rptr;
253 uint32_t cp_mqd_connect_endvi_sdma_mqd_pq_wptr;
254 uint32_t cp_mqd_connect_end_ib_rptr;
255 uint32_t reserved38;
256 uint32_t reserved39;
257 uint32_t cp_mqd_save_start_time_lo;
258 uint32_t cp_mqd_save_start_time_hi;
259 uint32_t cp_mqd_save_end_time_lo;
260 uint32_t cp_mqd_save_end_time_hi;
261 uint32_t cp_mqd_restore_start_time_lo;
262 uint32_t cp_mqd_restore_start_time_hi;
263 uint32_t cp_mqd_restore_end_time_lo;
264 uint32_t cp_mqd_restore_end_time_hi;
265 uint32_t disable_queue;
266 uint32_t reserved41;
267 uint32_t gds_cs_ctxsw_cnt0;
268 uint32_t gds_cs_ctxsw_cnt1;
269 uint32_t gds_cs_ctxsw_cnt2;
270 uint32_t gds_cs_ctxsw_cnt3;
271 uint32_t reserved42;
272 uint32_t reserved43;
273 uint32_t cp_pq_exe_status_lo;
274 uint32_t cp_pq_exe_status_hi;
275 uint32_t cp_packet_id_lo;
276 uint32_t cp_packet_id_hi;
277 uint32_t cp_packet_exe_status_lo;
278 uint32_t cp_packet_exe_status_hi;
279 uint32_t gds_save_base_addr_lo;
280 uint32_t gds_save_base_addr_hi;
281 uint32_t gds_save_mask_lo;
282 uint32_t gds_save_mask_hi;
283 uint32_t ctx_save_base_addr_lo;
284 uint32_t ctx_save_base_addr_hi;
285 uint32_t dynamic_cu_mask_addr_lo;
286 uint32_t dynamic_cu_mask_addr_hi;
287 uint32_t cp_mqd_base_addr_lo;
288 uint32_t cp_mqd_base_addr_hi;
289 uint32_t cp_hqd_active;
290 uint32_t cp_hqd_vmid;
291 uint32_t cp_hqd_persistent_state;
292 uint32_t cp_hqd_pipe_priority;
293 uint32_t cp_hqd_queue_priority;
294 uint32_t cp_hqd_quantum;
295 uint32_t cp_hqd_pq_base_lo;
296 uint32_t cp_hqd_pq_base_hi;
297 uint32_t cp_hqd_pq_rptr;
298 uint32_t cp_hqd_pq_rptr_report_addr_lo;
299 uint32_t cp_hqd_pq_rptr_report_addr_hi;
300 uint32_t cp_hqd_pq_wptr_poll_addr_lo;
301 uint32_t cp_hqd_pq_wptr_poll_addr_hi;
302 uint32_t cp_hqd_pq_doorbell_control;
303 uint32_t cp_hqd_pq_wptr;
304 uint32_t cp_hqd_pq_control;
305 uint32_t cp_hqd_ib_base_addr_lo;
306 uint32_t cp_hqd_ib_base_addr_hi;
307 uint32_t cp_hqd_ib_rptr;
308 uint32_t cp_hqd_ib_control;
309 uint32_t cp_hqd_iq_timer;
310 uint32_t cp_hqd_iq_rptr;
311 uint32_t cp_hqd_dequeue_request;
312 uint32_t cp_hqd_dma_offload;
313 uint32_t cp_hqd_sema_cmd;
314 uint32_t cp_hqd_msg_type;
315 uint32_t cp_hqd_atomic0_preop_lo;
316 uint32_t cp_hqd_atomic0_preop_hi;
317 uint32_t cp_hqd_atomic1_preop_lo;
318 uint32_t cp_hqd_atomic1_preop_hi;
319 uint32_t cp_hqd_hq_status0;
320 uint32_t cp_hqd_hq_control0;
321 uint32_t cp_mqd_control;
322 uint32_t cp_hqd_hq_status1;
323 uint32_t cp_hqd_hq_control1;
324 uint32_t cp_hqd_eop_base_addr_lo;
325 uint32_t cp_hqd_eop_base_addr_hi;
326 uint32_t cp_hqd_eop_control;
327 uint32_t cp_hqd_eop_rptr;
328 uint32_t cp_hqd_eop_wptr;
329 uint32_t cp_hqd_eop_done_events;
330 uint32_t cp_hqd_ctx_save_base_addr_lo;
331 uint32_t cp_hqd_ctx_save_base_addr_hi;
332 uint32_t cp_hqd_ctx_save_control;
333 uint32_t cp_hqd_cntl_stack_offset;
334 uint32_t cp_hqd_cntl_stack_size;
335 uint32_t cp_hqd_wg_state_offset;
336 uint32_t cp_hqd_ctx_save_size;
337 uint32_t cp_hqd_gds_resource_state;
338 uint32_t cp_hqd_error;
339 uint32_t cp_hqd_eop_wptr_mem;
340 uint32_t cp_hqd_eop_dones;
341 uint32_t reserved46;
342 uint32_t reserved47;
343 uint32_t reserved48;
344 uint32_t reserved49;
345 uint32_t reserved50;
346 uint32_t reserved51;
347 uint32_t reserved52;
348 uint32_t reserved53;
349 uint32_t reserved54;
350 uint32_t reserved55;
351 uint32_t iqtimer_pkt_header;
352 uint32_t iqtimer_pkt_dw0;
353 uint32_t iqtimer_pkt_dw1;
354 uint32_t iqtimer_pkt_dw2;
355 uint32_t iqtimer_pkt_dw3;
356 uint32_t iqtimer_pkt_dw4;
357 uint32_t iqtimer_pkt_dw5;
358 uint32_t iqtimer_pkt_dw6;
359 uint32_t iqtimer_pkt_dw7;
360 uint32_t iqtimer_pkt_dw8;
361 uint32_t iqtimer_pkt_dw9;
362 uint32_t iqtimer_pkt_dw10;
363 uint32_t iqtimer_pkt_dw11;
364 uint32_t iqtimer_pkt_dw12;
365 uint32_t iqtimer_pkt_dw13;
366 uint32_t iqtimer_pkt_dw14;
367 uint32_t iqtimer_pkt_dw15;
368 uint32_t iqtimer_pkt_dw16;
369 uint32_t iqtimer_pkt_dw17;
370 uint32_t iqtimer_pkt_dw18;
371 uint32_t iqtimer_pkt_dw19;
372 uint32_t iqtimer_pkt_dw20;
373 uint32_t iqtimer_pkt_dw21;
374 uint32_t iqtimer_pkt_dw22;
375 uint32_t iqtimer_pkt_dw23;
376 uint32_t iqtimer_pkt_dw24;
377 uint32_t iqtimer_pkt_dw25;
378 uint32_t iqtimer_pkt_dw26;
379 uint32_t iqtimer_pkt_dw27;
380 uint32_t iqtimer_pkt_dw28;
381 uint32_t iqtimer_pkt_dw29;
382 uint32_t iqtimer_pkt_dw30;
383 uint32_t iqtimer_pkt_dw31;
384 uint32_t reserved56;
385 uint32_t reserved57;
386 uint32_t reserved58;
387 uint32_t set_resources_header;
388 uint32_t set_resources_dw1;
389 uint32_t set_resources_dw2;
390 uint32_t set_resources_dw3;
391 uint32_t set_resources_dw4;
392 uint32_t set_resources_dw5;
393 uint32_t set_resources_dw6;
394 uint32_t set_resources_dw7;
395 uint32_t reserved59;
396 uint32_t reserved60;
397 uint32_t reserved61;
398 uint32_t reserved62;
399 uint32_t reserved63;
400 uint32_t reserved64;
401 uint32_t reserved65;
402 uint32_t reserved66;
403 uint32_t reserved67;
404 uint32_t reserved68;
405 uint32_t reserved69;
406 uint32_t reserved70;
407 uint32_t reserved71;
408 uint32_t reserved72;
409 uint32_t reserved73;
410 uint32_t reserved74;
411 uint32_t reserved75;
412 uint32_t reserved76;
413 uint32_t reserved77;
414 uint32_t reserved78;
415 uint32_t reserved_t[256];
416};
417
418struct vi_mqd_allocation {
419 struct vi_mqd mqd;
420 uint32_t wptr_poll_mem;
421 uint32_t rptr_report_mem;
422 uint32_t dyamic_cu_mask;
423 uint32_t dyamic_rb_mask;
424};
425
426struct cz_mqd {
427 uint32_t header;
428 uint32_t compute_dispatch_initiator;
429 uint32_t compute_dim_x;
430 uint32_t compute_dim_y;
431 uint32_t compute_dim_z;
432 uint32_t compute_start_x;
433 uint32_t compute_start_y;
434 uint32_t compute_start_z;
435 uint32_t compute_num_thread_x;
436 uint32_t compute_num_thread_y;
437 uint32_t compute_num_thread_z;
438 uint32_t compute_pipelinestat_enable;
439 uint32_t compute_perfcount_enable;
440 uint32_t compute_pgm_lo;
441 uint32_t compute_pgm_hi;
442 uint32_t compute_tba_lo;
443 uint32_t compute_tba_hi;
444 uint32_t compute_tma_lo;
445 uint32_t compute_tma_hi;
446 uint32_t compute_pgm_rsrc1;
447 uint32_t compute_pgm_rsrc2;
448 uint32_t compute_vmid;
449 uint32_t compute_resource_limits;
450 uint32_t compute_static_thread_mgmt_se0;
451 uint32_t compute_static_thread_mgmt_se1;
452 uint32_t compute_tmpring_size;
453 uint32_t compute_static_thread_mgmt_se2;
454 uint32_t compute_static_thread_mgmt_se3;
455 uint32_t compute_restart_x;
456 uint32_t compute_restart_y;
457 uint32_t compute_restart_z;
458 uint32_t compute_thread_trace_enable;
459 uint32_t compute_misc_reserved;
460 uint32_t compute_dispatch_id;
461 uint32_t compute_threadgroup_id;
462 uint32_t compute_relaunch;
463 uint32_t compute_wave_restore_addr_lo;
464 uint32_t compute_wave_restore_addr_hi;
465 uint32_t compute_wave_restore_control;
198 uint32_t reserved_39; 466 uint32_t reserved_39;
199 uint32_t reserved_40; 467 uint32_t reserved_40;
200 uint32_t reserved_41; 468 uint32_t reserved_41;