diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | 2 |
8 files changed, 19 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c31c5496dc5e..ffbe99d839a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h | |||
@@ -126,6 +126,7 @@ extern int amdgpu_param_buf_per_se; | |||
126 | extern int amdgpu_job_hang_limit; | 126 | extern int amdgpu_job_hang_limit; |
127 | extern int amdgpu_lbpw; | 127 | extern int amdgpu_lbpw; |
128 | extern int amdgpu_compute_multipipe; | 128 | extern int amdgpu_compute_multipipe; |
129 | extern int amdgpu_gpu_recovery; | ||
129 | 130 | ||
130 | #ifdef CONFIG_DRM_AMDGPU_SI | 131 | #ifdef CONFIG_DRM_AMDGPU_SI |
131 | extern int amdgpu_si_support; | 132 | extern int amdgpu_si_support; |
@@ -1910,7 +1911,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) | |||
1910 | #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) | 1911 | #define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) |
1911 | 1912 | ||
1912 | /* Common functions */ | 1913 | /* Common functions */ |
1913 | int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job); | 1914 | int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job* job, bool force); |
1914 | bool amdgpu_need_backup(struct amdgpu_device *adev); | 1915 | bool amdgpu_need_backup(struct amdgpu_device *adev); |
1915 | void amdgpu_pci_config_reset(struct amdgpu_device *adev); | 1916 | void amdgpu_pci_config_reset(struct amdgpu_device *adev); |
1916 | bool amdgpu_need_post(struct amdgpu_device *adev); | 1917 | bool amdgpu_need_post(struct amdgpu_device *adev); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 046b9d5bc14d..3f63f5ca4fa7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | |||
@@ -3009,11 +3009,12 @@ error: | |||
3009 | * | 3009 | * |
3010 | * @adev: amdgpu device pointer | 3010 | * @adev: amdgpu device pointer |
3011 | * @job: which job trigger hang | 3011 | * @job: which job trigger hang |
3012 | * @force forces reset regardless of amdgpu_gpu_recovery | ||
3012 | * | 3013 | * |
3013 | * Attempt to reset the GPU if it has hung (all asics). | 3014 | * Attempt to reset the GPU if it has hung (all asics). |
3014 | * Returns 0 for success or an error on failure. | 3015 | * Returns 0 for success or an error on failure. |
3015 | */ | 3016 | */ |
3016 | int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) | 3017 | int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job, bool force) |
3017 | { | 3018 | { |
3018 | struct drm_atomic_state *state = NULL; | 3019 | struct drm_atomic_state *state = NULL; |
3019 | uint64_t reset_flags = 0; | 3020 | uint64_t reset_flags = 0; |
@@ -3024,6 +3025,12 @@ int amdgpu_gpu_recover(struct amdgpu_device *adev, struct amdgpu_job *job) | |||
3024 | return 0; | 3025 | return 0; |
3025 | } | 3026 | } |
3026 | 3027 | ||
3028 | if (!force && (amdgpu_gpu_recovery == 0 || | ||
3029 | (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) { | ||
3030 | DRM_INFO("GPU recovery disabled.\n"); | ||
3031 | return 0; | ||
3032 | } | ||
3033 | |||
3027 | dev_info(adev->dev, "GPU reset begin!\n"); | 3034 | dev_info(adev->dev, "GPU reset begin!\n"); |
3028 | 3035 | ||
3029 | mutex_lock(&adev->lock_reset); | 3036 | mutex_lock(&adev->lock_reset); |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0b039bdcf84e..b734cd668ff1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | |||
@@ -128,6 +128,7 @@ int amdgpu_param_buf_per_se = 0; | |||
128 | int amdgpu_job_hang_limit = 0; | 128 | int amdgpu_job_hang_limit = 0; |
129 | int amdgpu_lbpw = -1; | 129 | int amdgpu_lbpw = -1; |
130 | int amdgpu_compute_multipipe = -1; | 130 | int amdgpu_compute_multipipe = -1; |
131 | int amdgpu_gpu_recovery = -1; /* auto */ | ||
131 | 132 | ||
132 | MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); | 133 | MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes"); |
133 | module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); | 134 | module_param_named(vramlimit, amdgpu_vram_limit, int, 0600); |
@@ -280,6 +281,9 @@ module_param_named(lbpw, amdgpu_lbpw, int, 0444); | |||
280 | MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); | 281 | MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)"); |
281 | module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); | 282 | module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); |
282 | 283 | ||
284 | MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto"); | ||
285 | module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); | ||
286 | |||
283 | #ifdef CONFIG_DRM_AMDGPU_SI | 287 | #ifdef CONFIG_DRM_AMDGPU_SI |
284 | 288 | ||
285 | #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) | 289 | #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE) |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7cb71a8e21df..d3ce12149542 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | |||
@@ -705,7 +705,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) | |||
705 | struct amdgpu_device *adev = dev->dev_private; | 705 | struct amdgpu_device *adev = dev->dev_private; |
706 | 706 | ||
707 | seq_printf(m, "gpu recover\n"); | 707 | seq_printf(m, "gpu recover\n"); |
708 | amdgpu_gpu_recover(adev, NULL); | 708 | amdgpu_gpu_recover(adev, NULL, true); |
709 | 709 | ||
710 | return 0; | 710 | return 0; |
711 | } | 711 | } |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index c340774082ea..c43643e8c8c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | |||
@@ -88,7 +88,7 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work) | |||
88 | reset_work); | 88 | reset_work); |
89 | 89 | ||
90 | if (!amdgpu_sriov_vf(adev)) | 90 | if (!amdgpu_sriov_vf(adev)) |
91 | amdgpu_gpu_recover(adev, NULL); | 91 | amdgpu_gpu_recover(adev, NULL, false); |
92 | } | 92 | } |
93 | 93 | ||
94 | /* Disable *all* interrupts */ | 94 | /* Disable *all* interrupts */ |
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 013c0a8cfb60..be8a437fad54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | |||
@@ -37,7 +37,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) | |||
37 | atomic_read(&job->ring->fence_drv.last_seq), | 37 | atomic_read(&job->ring->fence_drv.last_seq), |
38 | job->ring->fence_drv.sync_seq); | 38 | job->ring->fence_drv.sync_seq); |
39 | 39 | ||
40 | amdgpu_gpu_recover(job->adev, job); | 40 | amdgpu_gpu_recover(job->adev, job, false); |
41 | } | 41 | } |
42 | 42 | ||
43 | int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, | 43 | int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, |
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 71f56900d6fe..7ade56d59c27 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c | |||
@@ -253,7 +253,7 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work) | |||
253 | } | 253 | } |
254 | 254 | ||
255 | /* Trigger recovery due to world switch failure */ | 255 | /* Trigger recovery due to world switch failure */ |
256 | amdgpu_gpu_recover(adev, NULL); | 256 | amdgpu_gpu_recover(adev, NULL, false); |
257 | } | 257 | } |
258 | 258 | ||
259 | static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, | 259 | static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, |
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index df52824c0cd4..e05823d86cfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c | |||
@@ -521,7 +521,7 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) | |||
521 | } | 521 | } |
522 | 522 | ||
523 | /* Trigger recovery due to world switch failure */ | 523 | /* Trigger recovery due to world switch failure */ |
524 | amdgpu_gpu_recover(adev, NULL); | 524 | amdgpu_gpu_recover(adev, NULL, false); |
525 | } | 525 | } |
526 | 526 | ||
527 | static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, | 527 | static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, |