diff options
author | Dave Airlie <airlied@redhat.com> | 2019-01-30 23:19:45 -0500 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2019-01-31 18:34:20 -0500 |
commit | e09191d360ab54c7ac7a9b09287dcab81c30d8e3 (patch) | |
tree | be0c0a0ed2f0603ecaa430bd09811484adc04ee6 /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |
parent | f91168f48556486743392b8838e20afbd84b7b7a (diff) | |
parent | 10117450735c7a7c0858095fb46a860e7037cb9a (diff) |
Merge branch 'drm-next-5.1' of git://people.freedesktop.org/~agd5f/linux into drm-next
New stuff for 5.1.
amdgpu:
- DC bandwidth formula updates
- Support for DCC on scanout surfaces
- Support for multiple IH rings on soc15 asics
- Fix xgmi locking
- Add sysfs interface to get pcie usage stats
- Simplify DC i2c/aux code
- Initial support for BACO on vega10/20
- New runtime SMU feature debug interface
- Expand existing sysfs power interfaces to new clock domains
- Handle kexec properly
- Simplify IH programming
- Rework doorbell handling across asics
- Drop old CI DPM implementation
- DC page flipping fixes
- Misc SR-IOV fixes
amdkfd:
- Simplify the interfaces between amdkfd and amdgpu
ttm:
- Add a callback to notify the driver when the lru changes
sched:
- Refactor mirror list handling
- Rework hw fence processing
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190125231517.26268-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 77 |
1 files changed, 43 insertions, 34 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 28bccceaa363..c898b19f335a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -1645,7 +1645,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1645 | if (r) { | 1645 | if (r) { |
1646 | DRM_ERROR("sw_init of IP block <%s> failed %d\n", | 1646 | DRM_ERROR("sw_init of IP block <%s> failed %d\n", |
1647 | adev->ip_blocks[i].version->funcs->name, r); | 1647 | adev->ip_blocks[i].version->funcs->name, r); |
1648 | return r; | 1648 | goto init_failed; |
1649 | } | 1649 | } |
1650 | adev->ip_blocks[i].status.sw = true; | 1650 | adev->ip_blocks[i].status.sw = true; |
1651 | 1651 | ||
@@ -1654,17 +1654,17 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1654 | r = amdgpu_device_vram_scratch_init(adev); | 1654 | r = amdgpu_device_vram_scratch_init(adev); |
1655 | if (r) { | 1655 | if (r) { |
1656 | DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); | 1656 | DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r); |
1657 | return r; | 1657 | goto init_failed; |
1658 | } | 1658 | } |
1659 | r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); | 1659 | r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); |
1660 | if (r) { | 1660 | if (r) { |
1661 | DRM_ERROR("hw_init %d failed %d\n", i, r); | 1661 | DRM_ERROR("hw_init %d failed %d\n", i, r); |
1662 | return r; | 1662 | goto init_failed; |
1663 | } | 1663 | } |
1664 | r = amdgpu_device_wb_init(adev); | 1664 | r = amdgpu_device_wb_init(adev); |
1665 | if (r) { | 1665 | if (r) { |
1666 | DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); | 1666 | DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); |
1667 | return r; | 1667 | goto init_failed; |
1668 | } | 1668 | } |
1669 | adev->ip_blocks[i].status.hw = true; | 1669 | adev->ip_blocks[i].status.hw = true; |
1670 | 1670 | ||
@@ -1675,7 +1675,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1675 | AMDGPU_CSA_SIZE); | 1675 | AMDGPU_CSA_SIZE); |
1676 | if (r) { | 1676 | if (r) { |
1677 | DRM_ERROR("allocate CSA failed %d\n", r); | 1677 | DRM_ERROR("allocate CSA failed %d\n", r); |
1678 | return r; | 1678 | goto init_failed; |
1679 | } | 1679 | } |
1680 | } | 1680 | } |
1681 | } | 1681 | } |
@@ -1683,28 +1683,32 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) | |||
1683 | 1683 | ||
1684 | r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ | 1684 | r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ |
1685 | if (r) | 1685 | if (r) |
1686 | return r; | 1686 | goto init_failed; |
1687 | 1687 | ||
1688 | r = amdgpu_device_ip_hw_init_phase1(adev); | 1688 | r = amdgpu_device_ip_hw_init_phase1(adev); |
1689 | if (r) | 1689 | if (r) |
1690 | return r; | 1690 | goto init_failed; |
1691 | 1691 | ||
1692 | r = amdgpu_device_fw_loading(adev); | 1692 | r = amdgpu_device_fw_loading(adev); |
1693 | if (r) | 1693 | if (r) |
1694 | return r; | 1694 | goto init_failed; |
1695 | 1695 | ||
1696 | r = amdgpu_device_ip_hw_init_phase2(adev); | 1696 | r = amdgpu_device_ip_hw_init_phase2(adev); |
1697 | if (r) | 1697 | if (r) |
1698 | return r; | 1698 | goto init_failed; |
1699 | 1699 | ||
1700 | if (adev->gmc.xgmi.num_physical_nodes > 1) | 1700 | if (adev->gmc.xgmi.num_physical_nodes > 1) |
1701 | amdgpu_xgmi_add_device(adev); | 1701 | amdgpu_xgmi_add_device(adev); |
1702 | amdgpu_amdkfd_device_init(adev); | 1702 | amdgpu_amdkfd_device_init(adev); |
1703 | 1703 | ||
1704 | if (amdgpu_sriov_vf(adev)) | 1704 | init_failed: |
1705 | if (amdgpu_sriov_vf(adev)) { | ||
1706 | if (!r) | ||
1707 | amdgpu_virt_init_data_exchange(adev); | ||
1705 | amdgpu_virt_release_full_gpu(adev, true); | 1708 | amdgpu_virt_release_full_gpu(adev, true); |
1709 | } | ||
1706 | 1710 | ||
1707 | return 0; | 1711 | return r; |
1708 | } | 1712 | } |
1709 | 1713 | ||
1710 | /** | 1714 | /** |
@@ -2131,7 +2135,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) | |||
2131 | continue; | 2135 | continue; |
2132 | 2136 | ||
2133 | r = block->version->funcs->hw_init(adev); | 2137 | r = block->version->funcs->hw_init(adev); |
2134 | DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); | 2138 | DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); |
2135 | if (r) | 2139 | if (r) |
2136 | return r; | 2140 | return r; |
2137 | } | 2141 | } |
@@ -2165,7 +2169,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) | |||
2165 | continue; | 2169 | continue; |
2166 | 2170 | ||
2167 | r = block->version->funcs->hw_init(adev); | 2171 | r = block->version->funcs->hw_init(adev); |
2168 | DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); | 2172 | DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded"); |
2169 | if (r) | 2173 | if (r) |
2170 | return r; | 2174 | return r; |
2171 | } | 2175 | } |
@@ -2546,6 +2550,17 @@ int amdgpu_device_init(struct amdgpu_device *adev, | |||
2546 | /* detect if we are with an SRIOV vbios */ | 2550 | /* detect if we are with an SRIOV vbios */ |
2547 | amdgpu_device_detect_sriov_bios(adev); | 2551 | amdgpu_device_detect_sriov_bios(adev); |
2548 | 2552 | ||
2553 | /* check if we need to reset the asic | ||
2554 | * E.g., driver was not cleanly unloaded previously, etc. | ||
2555 | */ | ||
2556 | if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { | ||
2557 | r = amdgpu_asic_reset(adev); | ||
2558 | if (r) { | ||
2559 | dev_err(adev->dev, "asic reset on init failed\n"); | ||
2560 | goto failed; | ||
2561 | } | ||
2562 | } | ||
2563 | |||
2549 | /* Post card if necessary */ | 2564 | /* Post card if necessary */ |
2550 | if (amdgpu_device_need_post(adev)) { | 2565 | if (amdgpu_device_need_post(adev)) { |
2551 | if (!adev->bios) { | 2566 | if (!adev->bios) { |
@@ -2610,6 +2625,8 @@ fence_driver_init: | |||
2610 | } | 2625 | } |
2611 | dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); | 2626 | dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); |
2612 | amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); | 2627 | amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0); |
2628 | if (amdgpu_virt_request_full_gpu(adev, false)) | ||
2629 | amdgpu_virt_release_full_gpu(adev, false); | ||
2613 | goto failed; | 2630 | goto failed; |
2614 | } | 2631 | } |
2615 | 2632 | ||
@@ -2632,9 +2649,6 @@ fence_driver_init: | |||
2632 | goto failed; | 2649 | goto failed; |
2633 | } | 2650 | } |
2634 | 2651 | ||
2635 | if (amdgpu_sriov_vf(adev)) | ||
2636 | amdgpu_virt_init_data_exchange(adev); | ||
2637 | |||
2638 | amdgpu_fbdev_init(adev); | 2652 | amdgpu_fbdev_init(adev); |
2639 | 2653 | ||
2640 | r = amdgpu_pm_sysfs_init(adev); | 2654 | r = amdgpu_pm_sysfs_init(adev); |
@@ -2798,7 +2812,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) | |||
2798 | struct drm_framebuffer *fb = crtc->primary->fb; | 2812 | struct drm_framebuffer *fb = crtc->primary->fb; |
2799 | struct amdgpu_bo *robj; | 2813 | struct amdgpu_bo *robj; |
2800 | 2814 | ||
2801 | if (amdgpu_crtc->cursor_bo) { | 2815 | if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { |
2802 | struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); | 2816 | struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); |
2803 | r = amdgpu_bo_reserve(aobj, true); | 2817 | r = amdgpu_bo_reserve(aobj, true); |
2804 | if (r == 0) { | 2818 | if (r == 0) { |
@@ -2906,7 +2920,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) | |||
2906 | list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { | 2920 | list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) { |
2907 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); | 2921 | struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc); |
2908 | 2922 | ||
2909 | if (amdgpu_crtc->cursor_bo) { | 2923 | if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) { |
2910 | struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); | 2924 | struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo); |
2911 | r = amdgpu_bo_reserve(aobj, true); | 2925 | r = amdgpu_bo_reserve(aobj, true); |
2912 | if (r == 0) { | 2926 | if (r == 0) { |
@@ -3226,6 +3240,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, | |||
3226 | r = amdgpu_ib_ring_tests(adev); | 3240 | r = amdgpu_ib_ring_tests(adev); |
3227 | 3241 | ||
3228 | error: | 3242 | error: |
3243 | amdgpu_virt_init_data_exchange(adev); | ||
3229 | amdgpu_virt_release_full_gpu(adev, true); | 3244 | amdgpu_virt_release_full_gpu(adev, true); |
3230 | if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { | 3245 | if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { |
3231 | atomic_inc(&adev->vram_lost_counter); | 3246 | atomic_inc(&adev->vram_lost_counter); |
@@ -3298,17 +3313,15 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, | |||
3298 | if (!ring || !ring->sched.thread) | 3313 | if (!ring || !ring->sched.thread) |
3299 | continue; | 3314 | continue; |
3300 | 3315 | ||
3301 | kthread_park(ring->sched.thread); | 3316 | drm_sched_stop(&ring->sched); |
3302 | |||
3303 | if (job && job->base.sched != &ring->sched) | ||
3304 | continue; | ||
3305 | |||
3306 | drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); | ||
3307 | 3317 | ||
3308 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ | 3318 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ |
3309 | amdgpu_fence_driver_force_completion(ring); | 3319 | amdgpu_fence_driver_force_completion(ring); |
3310 | } | 3320 | } |
3311 | 3321 | ||
3322 | if(job) | ||
3323 | drm_sched_increase_karma(&job->base); | ||
3324 | |||
3312 | 3325 | ||
3313 | 3326 | ||
3314 | if (!amdgpu_sriov_vf(adev)) { | 3327 | if (!amdgpu_sriov_vf(adev)) { |
@@ -3454,14 +3467,10 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, | |||
3454 | if (!ring || !ring->sched.thread) | 3467 | if (!ring || !ring->sched.thread) |
3455 | continue; | 3468 | continue; |
3456 | 3469 | ||
3457 | /* only need recovery sched of the given job's ring | 3470 | if (!adev->asic_reset_res) |
3458 | * or all rings (in the case @job is NULL) | 3471 | drm_sched_resubmit_jobs(&ring->sched); |
3459 | * after above amdgpu_reset accomplished | ||
3460 | */ | ||
3461 | if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res) | ||
3462 | drm_sched_job_recovery(&ring->sched); | ||
3463 | 3472 | ||
3464 | kthread_unpark(ring->sched.thread); | 3473 | drm_sched_start(&ring->sched, !adev->asic_reset_res); |
3465 | } | 3474 | } |
3466 | 3475 | ||
3467 | if (!amdgpu_device_has_dc_support(adev)) { | 3476 | if (!amdgpu_device_has_dc_support(adev)) { |
@@ -3521,9 +3530,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, | |||
3521 | * by different nodes. No point also since the one node already executing | 3530 | * by different nodes. No point also since the one node already executing |
3522 | * reset will also reset all the other nodes in the hive. | 3531 | * reset will also reset all the other nodes in the hive. |
3523 | */ | 3532 | */ |
3524 | hive = amdgpu_get_xgmi_hive(adev); | 3533 | hive = amdgpu_get_xgmi_hive(adev, 0); |
3525 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && | 3534 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && |
3526 | !mutex_trylock(&hive->hive_lock)) | 3535 | !mutex_trylock(&hive->reset_lock)) |
3527 | return 0; | 3536 | return 0; |
3528 | 3537 | ||
3529 | /* Start with adev pre asic reset first for soft reset check.*/ | 3538 | /* Start with adev pre asic reset first for soft reset check.*/ |
@@ -3602,7 +3611,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */ | |||
3602 | } | 3611 | } |
3603 | 3612 | ||
3604 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1) | 3613 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1) |
3605 | mutex_unlock(&hive->hive_lock); | 3614 | mutex_unlock(&hive->reset_lock); |
3606 | 3615 | ||
3607 | if (r) | 3616 | if (r) |
3608 | dev_info(adev->dev, "GPU reset end with ret = %d\n", r); | 3617 | dev_info(adev->dev, "GPU reset end with ret = %d\n", r); |