aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
diff options
context:
space:
mode:
authorMonk Liu <Monk.Liu@amd.com>2017-12-25 02:59:30 -0500
committerAlex Deucher <alexander.deucher@amd.com>2018-03-01 12:04:17 -0500
commita22144a58f784265fe8140724a7390443f63ef53 (patch)
tree394a81a535d1bff9cbe7fb3baab5cd1994d06762 /drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
parentc41d1cf62d3615294c1dee291b05ee3220a4de6c (diff)
drm/amdgpu: try again kiq access if not in IRQ(v4)
sometimes GPU is switched to other VFs and won't swich back soon, so the kiq reg access will not signal within a short period, instead of busy waiting a long time(MAX_KEQ_REG_WAIT) and returning TMO we can istead sleep 5ms and try again later (non irq context) And since the waiting in kiq_r/weg is busy wait, so MAX_KIQ_REG_WAIT shouldn't set to a long time, set it to 10ms is more appropriate. if gpu already in reset state, don't retry the KIQ reg access otherwise it would always hang because KIQ was already die usually. v2: replace schedule() with msleep() for the wait v3: use while loop for the wait repeating use macros for the sleep period more description for it v4: drop unused variable Signed-off-by: Monk Liu <Monk.Liu@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com Reviewed-by: Pixel Ding <Pixel.Ding@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c64
1 files changed, 53 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index b832651d2137..42c140155b70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -22,7 +22,9 @@
22 */ 22 */
23 23
24#include "amdgpu.h" 24#include "amdgpu.h"
25#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ 25#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
26#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
27#define MAX_KIQ_REG_TRY 20
26 28
27uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) 29uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev)
28{ 30{
@@ -137,9 +139,9 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
137 139
138uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) 140uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
139{ 141{
140 signed long r; 142 signed long r, cnt = 0;
141 unsigned long flags; 143 unsigned long flags;
142 uint32_t val, seq; 144 uint32_t seq;
143 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 145 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
144 struct amdgpu_ring *ring = &kiq->ring; 146 struct amdgpu_ring *ring = &kiq->ring;
145 147
@@ -153,18 +155,36 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
153 spin_unlock_irqrestore(&kiq->ring_lock, flags); 155 spin_unlock_irqrestore(&kiq->ring_lock, flags);
154 156
155 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 157 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
156 if (r < 1) { 158
157 DRM_ERROR("wait for kiq fence error: %ld\n", r); 159 /* don't wait anymore for gpu reset case because this way may
158 return ~0; 160 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
161 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
162 * never return if we keep waiting in virt_kiq_rreg, which cause
163 * gpu_recover() hang there.
164 *
165 * also don't wait anymore for IRQ context
166 * */
167 if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
168 goto failed_kiq_read;
169
170 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
171 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
172 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
159 } 173 }
160 val = adev->wb.wb[adev->virt.reg_val_offs];
161 174
162 return val; 175 if (cnt > MAX_KIQ_REG_TRY)
176 goto failed_kiq_read;
177
178 return adev->wb.wb[adev->virt.reg_val_offs];
179
180failed_kiq_read:
181 pr_err("failed to read reg:%x\n", reg);
182 return ~0;
163} 183}
164 184
165void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) 185void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
166{ 186{
167 signed long r; 187 signed long r, cnt = 0;
168 unsigned long flags; 188 unsigned long flags;
169 uint32_t seq; 189 uint32_t seq;
170 struct amdgpu_kiq *kiq = &adev->gfx.kiq; 190 struct amdgpu_kiq *kiq = &adev->gfx.kiq;
@@ -180,8 +200,30 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
180 spin_unlock_irqrestore(&kiq->ring_lock, flags); 200 spin_unlock_irqrestore(&kiq->ring_lock, flags);
181 201
182 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); 202 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
183 if (r < 1) 203
184 DRM_ERROR("wait for kiq fence error: %ld\n", r); 204 /* don't wait anymore for gpu reset case because this way may
205 * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
206 * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
207 * never return if we keep waiting in virt_kiq_rreg, which cause
208 * gpu_recover() hang there.
209 *
210 * also don't wait anymore for IRQ context
211 * */
212 if (r < 1 && (adev->in_gpu_reset || in_interrupt()))
213 goto failed_kiq_write;
214
215 while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
216 msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
217 r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
218 }
219
220 if (cnt > MAX_KIQ_REG_TRY)
221 goto failed_kiq_write;
222
223 return;
224
225failed_kiq_write:
226 pr_err("failed to write reg:%x\n", reg);
185} 227}
186 228
187/** 229/**