aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c2
8 files changed, 19 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c31c5496dc5e..ffbe99d839a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -126,6 +126,7 @@ extern int amdgpu_param_buf_per_se;
126extern int amdgpu_job_hang_limit; 126extern int amdgpu_job_hang_limit;
127extern int amdgpu_lbpw; 127extern int amdgpu_lbpw;
128extern int amdgpu_compute_multipipe; 128extern int amdgpu_compute_multipipe;
129extern int amdgpu_gpu_recovery;
129 130
130#ifdef CONFIG_DRM_AMDGPU_SI 131#ifdef CONFIG_DRM_AMDGPU_SI
131extern int amdgpu_si_support; 132extern int amdgpu_si_support;
@@ -1910,7 +1911,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
1910#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) 1911#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i))
1911 1912
1912/* Common functions */ 1913/* Common functions */
1913int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); 1914int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job, bool force);
1914bool amdgpu_need_backup(struct amdgpu_device *adev); 1915bool amdgpu_need_backup(struct amdgpu_device *adev);
1915void amdgpu_pci_config_reset(struct amdgpu_device *adev); 1916void amdgpu_pci_config_reset(struct amdgpu_device *adev);
1916bool amdgpu_need_post(struct amdgpu_device *adev); 1917bool amdgpu_need_post(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 046b9d5bc14d..3f63f5ca4fa7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3009,11 +3009,12 @@ error:
3009 * 3009 *
3010 * @adev: amdgpu device pointer 3010 * @adev: amdgpu device pointer
3011 * @job: which job trigger hang 3011 * @job: which job trigger hang
3012 * @force forces reset regardless of amdgpu_gpu_recovery
3012 * 3013 *
3013 * Attempt to reset the GPU if it has hung (all asics). 3014 * Attempt to reset the GPU if it has hung (all asics).
3014 * Returns 0 for success or an error on failure. 3015 * Returns 0 for success or an error on failure.
3015 */ 3016 */
3016int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) 3017int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force)
3017{ 3018{
3018 struct drm_atomic_state *state = NULL; 3019 struct drm_atomic_state *state = NULL;
3019 uint64_t reset_flags = 0; 3020 uint64_t reset_flags = 0;
@@ -3024,6 +3025,12 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job)
3024 return 0; 3025 return 0;
3025 } 3026 }
3026 3027
3028 if (!force && (amdgpu_gpu_recovery == 0 ||
3029 (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) {
3030 DRM_INFO("GPU recovery disabled.\n");
3031 return 0;
3032 }
3033
3027 dev_info(adev->dev, "GPU reset begin!\n"); 3034 dev_info(adev->dev, "GPU reset begin!\n");
3028 3035
3029 mutex_lock(&adev->lock_reset); 3036 mutex_lock(&adev->lock_reset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 0b039bdcf84e..b734cd668ff1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0;
128int amdgpu_job_hang_limit = 0; 128int amdgpu_job_hang_limit = 0;
129int amdgpu_lbpw = -1; 129int amdgpu_lbpw = -1;
130int amdgpu_compute_multipipe = -1; 130int amdgpu_compute_multipipe = -1;
131int amdgpu_gpu_recovery = -1; /* auto */
131 132
132MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); 133MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
133module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); 134module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444);
280MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); 281MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
281module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); 282module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
282 283
284MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto");
285module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
286
283#ifdef CONFIG_DRM_AMDGPU_SI 287#ifdef CONFIG_DRM_AMDGPU_SI
284 288
285#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) 289#if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 7cb71a8e21df..d3ce12149542 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -705,7 +705,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data)
705 struct amdgpu_device *adev = dev->dev_private; 705 struct amdgpu_device *adev = dev->dev_private;
706 706
707 seq_printf(m, "gpu recover\n"); 707 seq_printf(m, "gpu recover\n");
708 amdgpu_gpu_recover(adev, NULL); 708 amdgpu_gpu_recover(adev, NULL, true);
709 709
710 return 0; 710 return 0;
711} 711}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index c340774082ea..c43643e8c8c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
88 reset_work); 88 reset_work);
89 89
90 if (!amdgpu_sriov_vf(adev)) 90 if (!amdgpu_sriov_vf(adev))
91 amdgpu_gpu_recover(adev, NULL); 91 amdgpu_gpu_recover(adev, NULL, false);
92} 92}
93 93
94/* Disable *all* interrupts */ 94/* Disable *all* interrupts */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 013c0a8cfb60..be8a437fad54 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -37,7 +37,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
37 atomic_read(&job->ring->fence_drv.last_seq), 37 atomic_read(&job->ring->fence_drv.last_seq),
38 job->ring->fence_drv.sync_seq); 38 job->ring->fence_drv.sync_seq);
39 39
40 amdgpu_gpu_recover(job->adev, job); 40 amdgpu_gpu_recover(job->adev, job, false);
41} 41}
42 42
43int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, 43int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 71f56900d6fe..7ade56d59c27 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -253,7 +253,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
253 } 253 }
254 254
255 /* Trigger recovery due to world switch failure */ 255 /* Trigger recovery due to world switch failure */
256 amdgpu_gpu_recover(adev, NULL); 256 amdgpu_gpu_recover(adev, NULL, false);
257} 257}
258 258
259static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, 259static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
index df52824c0cd4..e05823d86cfb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
@@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
521 } 521 }
522 522
523 /* Trigger recovery due to world switch failure */ 523 /* Trigger recovery due to world switch failure */
524 amdgpu_gpu_recover(adev, NULL); 524 amdgpu_gpu_recover(adev, NULL, false);
525} 525}
526 526
527static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, 527static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,