aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2019-01-30 23:19:45 -0500
committerDave Airlie <airlied@redhat.com>2019-01-31 18:34:20 -0500
commite09191d360ab54c7ac7a9b09287dcab81c30d8e3 (patch)
treebe0c0a0ed2f0603ecaa430bd09811484adc04ee6 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parentf91168f48556486743392b8838e20afbd84b7b7a (diff)
parent10117450735c7a7c0858095fb46a860e7037cb9a (diff)
Merge branch 'drm-next-5.1' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.1. amdgpu: - DC bandwidth formula updates - Support for DCC on scanout surfaces - Support for multiple IH rings on soc15 asics - Fix xgmi locking - Add sysfs interface to get pcie usage stats - Simplify DC i2c/aux code - Initial support for BACO on vega10/20 - New runtime SMU feature debug interface - Expand existing sysfs power interfaces to new clock domains - Handle kexec properly - Simplify IH programming - Rework doorbell handling across asics - Drop old CI DPM implementation - DC page flipping fixes - Misc SR-IOV fixes amdkfd: - Simplify the interfaces between amdkfd and amdgpu ttm: - Add a callback to notify the driver when the lru changes sched: - Refactor mirror list handling - Rework hw fence processing Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20190125231517.26268-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c77
1 files changed, 43 insertions, 34 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 28bccceaa363..c898b19f335a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1645,7 +1645,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1645 if (r) { 1645 if (r) {
1646 DRM_ERROR("sw_init of IP block <%s> failed %d\n", 1646 DRM_ERROR("sw_init of IP block <%s> failed %d\n",
1647 adev->ip_blocks[i].version->funcs->name, r); 1647 adev->ip_blocks[i].version->funcs->name, r);
1648 return r; 1648 goto init_failed;
1649 } 1649 }
1650 adev->ip_blocks[i].status.sw = true; 1650 adev->ip_blocks[i].status.sw = true;
1651 1651
@@ -1654,17 +1654,17 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1654 r = amdgpu_device_vram_scratch_init(adev); 1654 r = amdgpu_device_vram_scratch_init(adev);
1655 if (r) { 1655 if (r) {
1656 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); 1656 DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
1657 return r; 1657 goto init_failed;
1658 } 1658 }
1659 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); 1659 r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
1660 if (r) { 1660 if (r) {
1661 DRM_ERROR("hw_init %d failed %d\n", i, r); 1661 DRM_ERROR("hw_init %d failed %d\n", i, r);
1662 return r; 1662 goto init_failed;
1663 } 1663 }
1664 r = amdgpu_device_wb_init(adev); 1664 r = amdgpu_device_wb_init(adev);
1665 if (r) { 1665 if (r) {
1666 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); 1666 DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
1667 return r; 1667 goto init_failed;
1668 } 1668 }
1669 adev->ip_blocks[i].status.hw = true; 1669 adev->ip_blocks[i].status.hw = true;
1670 1670
@@ -1675,7 +1675,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1675 AMDGPU_CSA_SIZE); 1675 AMDGPU_CSA_SIZE);
1676 if (r) { 1676 if (r) {
1677 DRM_ERROR("allocate CSA failed %d\n", r); 1677 DRM_ERROR("allocate CSA failed %d\n", r);
1678 return r; 1678 goto init_failed;
1679 } 1679 }
1680 } 1680 }
1681 } 1681 }
@@ -1683,28 +1683,32 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
1683 1683
1684 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ 1684 r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
1685 if (r) 1685 if (r)
1686 return r; 1686 goto init_failed;
1687 1687
1688 r = amdgpu_device_ip_hw_init_phase1(adev); 1688 r = amdgpu_device_ip_hw_init_phase1(adev);
1689 if (r) 1689 if (r)
1690 return r; 1690 goto init_failed;
1691 1691
1692 r = amdgpu_device_fw_loading(adev); 1692 r = amdgpu_device_fw_loading(adev);
1693 if (r) 1693 if (r)
1694 return r; 1694 goto init_failed;
1695 1695
1696 r = amdgpu_device_ip_hw_init_phase2(adev); 1696 r = amdgpu_device_ip_hw_init_phase2(adev);
1697 if (r) 1697 if (r)
1698 return r; 1698 goto init_failed;
1699 1699
1700 if (adev->gmc.xgmi.num_physical_nodes > 1) 1700 if (adev->gmc.xgmi.num_physical_nodes > 1)
1701 amdgpu_xgmi_add_device(adev); 1701 amdgpu_xgmi_add_device(adev);
1702 amdgpu_amdkfd_device_init(adev); 1702 amdgpu_amdkfd_device_init(adev);
1703 1703
1704 if (amdgpu_sriov_vf(adev)) 1704init_failed:
1705 if (amdgpu_sriov_vf(adev)) {
1706 if (!r)
1707 amdgpu_virt_init_data_exchange(adev);
1705 amdgpu_virt_release_full_gpu(adev, true); 1708 amdgpu_virt_release_full_gpu(adev, true);
1709 }
1706 1710
1707 return 0; 1711 return r;
1708} 1712}
1709 1713
1710/** 1714/**
@@ -2131,7 +2135,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
2131 continue; 2135 continue;
2132 2136
2133 r = block->version->funcs->hw_init(adev); 2137 r = block->version->funcs->hw_init(adev);
2134 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2138 DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2135 if (r) 2139 if (r)
2136 return r; 2140 return r;
2137 } 2141 }
@@ -2165,7 +2169,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
2165 continue; 2169 continue;
2166 2170
2167 r = block->version->funcs->hw_init(adev); 2171 r = block->version->funcs->hw_init(adev);
2168 DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); 2172 DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
2169 if (r) 2173 if (r)
2170 return r; 2174 return r;
2171 } 2175 }
@@ -2546,6 +2550,17 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2546 /* detect if we are with an SRIOV vbios */ 2550 /* detect if we are with an SRIOV vbios */
2547 amdgpu_device_detect_sriov_bios(adev); 2551 amdgpu_device_detect_sriov_bios(adev);
2548 2552
2553 /* check if we need to reset the asic
2554 * E.g., driver was not cleanly unloaded previously, etc.
2555 */
2556 if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
2557 r = amdgpu_asic_reset(adev);
2558 if (r) {
2559 dev_err(adev->dev, "asic reset on init failed\n");
2560 goto failed;
2561 }
2562 }
2563
2549 /* Post card if necessary */ 2564 /* Post card if necessary */
2550 if (amdgpu_device_need_post(adev)) { 2565 if (amdgpu_device_need_post(adev)) {
2551 if (!adev->bios) { 2566 if (!adev->bios) {
@@ -2610,6 +2625,8 @@ fence_driver_init:
2610 } 2625 }
2611 dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); 2626 dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
2612 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); 2627 amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
2628 if (amdgpu_virt_request_full_gpu(adev, false))
2629 amdgpu_virt_release_full_gpu(adev, false);
2613 goto failed; 2630 goto failed;
2614 } 2631 }
2615 2632
@@ -2632,9 +2649,6 @@ fence_driver_init:
2632 goto failed; 2649 goto failed;
2633 } 2650 }
2634 2651
2635 if (amdgpu_sriov_vf(adev))
2636 amdgpu_virt_init_data_exchange(adev);
2637
2638 amdgpu_fbdev_init(adev); 2652 amdgpu_fbdev_init(adev);
2639 2653
2640 r = amdgpu_pm_sysfs_init(adev); 2654 r = amdgpu_pm_sysfs_init(adev);
@@ -2798,7 +2812,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
2798 struct drm_framebuffer *fb = crtc->primary->fb; 2812 struct drm_framebuffer *fb = crtc->primary->fb;
2799 struct amdgpu_bo *robj; 2813 struct amdgpu_bo *robj;
2800 2814
2801 if (amdgpu_crtc->cursor_bo) { 2815 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
2802 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2816 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2803 r = amdgpu_bo_reserve(aobj, true); 2817 r = amdgpu_bo_reserve(aobj, true);
2804 if (r == 0) { 2818 if (r == 0) {
@@ -2906,7 +2920,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
2906 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { 2920 list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
2907 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); 2921 struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
2908 2922
2909 if (amdgpu_crtc->cursor_bo) { 2923 if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
2910 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); 2924 struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
2911 r = amdgpu_bo_reserve(aobj, true); 2925 r = amdgpu_bo_reserve(aobj, true);
2912 if (r == 0) { 2926 if (r == 0) {
@@ -3226,6 +3240,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
3226 r = amdgpu_ib_ring_tests(adev); 3240 r = amdgpu_ib_ring_tests(adev);
3227 3241
3228error: 3242error:
3243 amdgpu_virt_init_data_exchange(adev);
3229 amdgpu_virt_release_full_gpu(adev, true); 3244 amdgpu_virt_release_full_gpu(adev, true);
3230 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { 3245 if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
3231 atomic_inc(&adev->vram_lost_counter); 3246 atomic_inc(&adev->vram_lost_counter);
@@ -3298,17 +3313,15 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
3298 if (!ring || !ring->sched.thread) 3313 if (!ring || !ring->sched.thread)
3299 continue; 3314 continue;
3300 3315
3301 kthread_park(ring->sched.thread); 3316 drm_sched_stop(&ring->sched);
3302
3303 if (job && job->base.sched != &ring->sched)
3304 continue;
3305
3306 drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
3307 3317
3308 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ 3318 /* after all hw jobs are reset, hw fence is meaningless, so force_completion */
3309 amdgpu_fence_driver_force_completion(ring); 3319 amdgpu_fence_driver_force_completion(ring);
3310 } 3320 }
3311 3321
3322 if(job)
3323 drm_sched_increase_karma(&job->base);
3324
3312 3325
3313 3326
3314 if (!amdgpu_sriov_vf(adev)) { 3327 if (!amdgpu_sriov_vf(adev)) {
@@ -3454,14 +3467,10 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
3454 if (!ring || !ring->sched.thread) 3467 if (!ring || !ring->sched.thread)
3455 continue; 3468 continue;
3456 3469
3457 /* only need recovery sched of the given job's ring 3470 if (!adev->asic_reset_res)
3458 * or all rings (in the case @job is NULL) 3471 drm_sched_resubmit_jobs(&ring->sched);
3459 * after above amdgpu_reset accomplished
3460 */
3461 if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
3462 drm_sched_job_recovery(&ring->sched);
3463 3472
3464 kthread_unpark(ring->sched.thread); 3473 drm_sched_start(&ring->sched, !adev->asic_reset_res);
3465 } 3474 }
3466 3475
3467 if (!amdgpu_device_has_dc_support(adev)) { 3476 if (!amdgpu_device_has_dc_support(adev)) {
@@ -3521,9 +3530,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
3521 * by different nodes. No point also since the one node already executing 3530 * by different nodes. No point also since the one node already executing
3522 * reset will also reset all the other nodes in the hive. 3531 * reset will also reset all the other nodes in the hive.
3523 */ 3532 */
3524 hive = amdgpu_get_xgmi_hive(adev); 3533 hive = amdgpu_get_xgmi_hive(adev, 0);
3525 if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && 3534 if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
3526 !mutex_trylock(&hive->hive_lock)) 3535 !mutex_trylock(&hive->reset_lock))
3527 return 0; 3536 return 0;
3528 3537
3529 /* Start with adev pre asic reset first for soft reset check.*/ 3538 /* Start with adev pre asic reset first for soft reset check.*/
@@ -3602,7 +3611,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
3602 } 3611 }
3603 3612
3604 if (hive && adev->gmc.xgmi.num_physical_nodes > 1) 3613 if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
3605 mutex_unlock(&hive->hive_lock); 3614 mutex_unlock(&hive->reset_lock);
3606 3615
3607 if (r) 3616 if (r)
3608 dev_info(adev->dev, "GPU reset end with ret = %d\n", r); 3617 dev_info(adev->dev, "GPU reset end with ret = %d\n", r);