aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-12-12 18:49:04 -0500
committerDave Airlie <airlied@redhat.com>2018-12-12 19:06:34 -0500
commite7df065a697783ecb5c6eaa5692d78dcfceb71dd (patch)
tree2e42f7ec8ac73c13ffd6575e016dad1beb4de83b /drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
parente69aa5f9b97f7f871643336deb281db5cb14878b (diff)
parent674e78acae0dfb4beb56132e41cbae5b60f7d662 (diff)
Merge branch 'drm-next-4.21' of git://people.freedesktop.org/~agd5f/linux into drm-next
[airlied: make etnaviv build again] amdgpu: - DC trace support - More DC documentation - XGMI hive reset support - Rework IH interaction with KFD - Misc fixes and cleanups - Powerplay updates for newer polaris variants - Add cursor plane update fast path - Enable gpu reset by default on CI parts - Fix config with KFD/HSA not enabled amdkfd: - Limit vram overcommit - dmabuf support - Support for doorbell BOs ttm: - Support for simultaneous submissions to multiple engines scheduler: - Add helpers for hw with preemption support Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexdeucher@gmail.com> Link: https://patchwork.freedesktop.org/patch/msgid/20181207233119.16861-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c52
1 files changed, 46 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c75badfa5c4c..b60afeade50a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -515,7 +515,6 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
515 */ 515 */
516static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) 516static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
517{ 517{
518 amdgpu_asic_init_doorbell_index(adev);
519 518
520 /* No doorbell on SI hardware generation */ 519 /* No doorbell on SI hardware generation */
521 if (adev->asic_type < CHIP_BONAIRE) { 520 if (adev->asic_type < CHIP_BONAIRE) {
@@ -529,6 +528,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
529 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) 528 if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
530 return -EINVAL; 529 return -EINVAL;
531 530
531 amdgpu_asic_init_doorbell_index(adev);
532
532 /* doorbell bar mapping */ 533 /* doorbell bar mapping */
533 adev->doorbell.base = pci_resource_start(adev->pdev, 2); 534 adev->doorbell.base = pci_resource_start(adev->pdev, 2);
534 adev->doorbell.size = pci_resource_len(adev->pdev, 2); 535 adev->doorbell.size = pci_resource_len(adev->pdev, 2);
@@ -1864,6 +1865,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
1864{ 1865{
1865 int i, r; 1866 int i, r;
1866 1867
1868 if (adev->gmc.xgmi.num_physical_nodes > 1)
1869 amdgpu_xgmi_remove_device(adev);
1870
1867 amdgpu_amdkfd_device_fini(adev); 1871 amdgpu_amdkfd_device_fini(adev);
1868 1872
1869 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); 1873 amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
@@ -2353,6 +2357,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
2353 return amdgpu_device_asic_has_dc_support(adev->asic_type); 2357 return amdgpu_device_asic_has_dc_support(adev->asic_type);
2354} 2358}
2355 2359
2360
2361static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
2362{
2363 struct amdgpu_device *adev =
2364 container_of(__work, struct amdgpu_device, xgmi_reset_work);
2365
2366 adev->asic_reset_res = amdgpu_asic_reset(adev);
2367 if (adev->asic_reset_res)
2368 DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
2369 adev->asic_reset_res, adev->ddev->unique);
2370}
2371
2372
2356/** 2373/**
2357 * amdgpu_device_init - initialize the driver 2374 * amdgpu_device_init - initialize the driver
2358 * 2375 *
@@ -2451,6 +2468,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
2451 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, 2468 INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
2452 amdgpu_device_delay_enable_gfx_off); 2469 amdgpu_device_delay_enable_gfx_off);
2453 2470
2471 INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
2472
2454 adev->gfx.gfx_off_req_count = 1; 2473 adev->gfx.gfx_off_req_count = 1;
2455 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; 2474 adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
2456 2475
@@ -3239,6 +3258,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
3239 3258
3240 if (amdgpu_gpu_recovery == -1) { 3259 if (amdgpu_gpu_recovery == -1) {
3241 switch (adev->asic_type) { 3260 switch (adev->asic_type) {
3261 case CHIP_BONAIRE:
3262 case CHIP_HAWAII:
3242 case CHIP_TOPAZ: 3263 case CHIP_TOPAZ:
3243 case CHIP_TONGA: 3264 case CHIP_TONGA:
3244 case CHIP_FIJI: 3265 case CHIP_FIJI:
@@ -3328,10 +3349,31 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
3328 */ 3349 */
3329 if (need_full_reset) { 3350 if (need_full_reset) {
3330 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { 3351 list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
3331 r = amdgpu_asic_reset(tmp_adev); 3352 /* For XGMI run all resets in parallel to speed up the process */
3332 if (r) 3353 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3333 DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", 3354 if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
3355 r = -EALREADY;
3356 } else
3357 r = amdgpu_asic_reset(tmp_adev);
3358
3359 if (r) {
3360 DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
3334 r, tmp_adev->ddev->unique); 3361 r, tmp_adev->ddev->unique);
3362 break;
3363 }
3364 }
3365
3366 /* For XGMI wait for all PSP resets to complete before proceed */
3367 if (!r) {
3368 list_for_each_entry(tmp_adev, device_list_handle,
3369 gmc.xgmi.head) {
3370 if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
3371 flush_work(&tmp_adev->xgmi_reset_work);
3372 r = tmp_adev->asic_reset_res;
3373 if (r)
3374 break;
3375 }
3376 }
3335 } 3377 }
3336 } 3378 }
3337 3379
@@ -3518,8 +3560,6 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
3518 if (tmp_adev == adev) 3560 if (tmp_adev == adev)
3519 continue; 3561 continue;
3520 3562
3521 dev_info(tmp_adev->dev, "GPU reset begin for drm dev %s!\n", adev->ddev->unique);
3522
3523 amdgpu_device_lock_adev(tmp_adev); 3563 amdgpu_device_lock_adev(tmp_adev);
3524 r = amdgpu_device_pre_asic_reset(tmp_adev, 3564 r = amdgpu_device_pre_asic_reset(tmp_adev,
3525 NULL, 3565 NULL,