aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2017-08-26 02:43:06 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-09-26 14:53:20 -0400
commita2f14820e3493145c25095873d4a510a1b25efdc (patch)
tree801651223be96004fc4f39ef658c3bd282311ca2 /drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
parent5d86b2c391965cbcb295e8fa795276977b2a416e (diff)
drm/amdgpu: Track pending retry faults in IH and VM (v2)
IH tracks pending retry faults in a hash table for fast lookup in interrupt context. Each VM has a short FIFO of pending VM faults for processing in a bottom half. The IH prescreening stage adds retry faults and filters out repeated retry interrupts to minimize the impact of interrupt storms. It's the VM's responsibility remove pending faults once they are handled. For now this is only done when the VM is destroyed. v2: - Made the hash table smaller and the FIFO longer. I never want the FIFO to fill up, because that would make prescreen take longer. 128 pending page faults should be enough to keep migrations busy. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> (v1) Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c76
1 files changed, 76 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index c834a40cfad6..f5f27e4f0f7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -196,3 +196,79 @@ restart_ih:
196 196
197 return IRQ_HANDLED; 197 return IRQ_HANDLED;
198} 198}
199
200/**
201 * amdgpu_ih_add_fault - Add a page fault record
202 *
203 * @adev: amdgpu device pointer
204 * @key: 64-bit encoding of PASID and address
205 *
206 * This should be called when a retry page fault interrupt is
207 * received. If this is a new page fault, it will be added to a hash
208 * table. The return value indicates whether this is a new fault, or
209 * a fault that was already known and is already being handled.
210 *
211 * If there are too many pending page faults, this will fail. Retry
212 * interrupts should be ignored in this case until there is enough
213 * free space.
214 *
215 * Returns 0 if the fault was added, 1 if the fault was already known,
216 * -ENOSPC if there are too many pending faults.
217 */
218int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
219{
220 unsigned long flags;
221 int r = -ENOSPC;
222
223 if (WARN_ON_ONCE(!adev->irq.ih.faults))
224 /* Should be allocated in <IP>_ih_sw_init on GPUs that
225 * support retry faults and require retry filtering.
226 */
227 return r;
228
229 spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
230
231 /* Only let the hash table fill up to 50% for best performance */
232 if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
233 goto unlock_out;
234
235 r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
236 if (!r)
237 adev->irq.ih.faults->count++;
238
239 /* chash_table_copy_in should never fail unless we're losing count */
240 WARN_ON_ONCE(r < 0);
241
242unlock_out:
243 spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
244 return r;
245}
246
247/**
248 * amdgpu_ih_clear_fault - Remove a page fault record
249 *
250 * @adev: amdgpu device pointer
251 * @key: 64-bit encoding of PASID and address
252 *
253 * This should be called when a page fault has been handled. Any
254 * future interrupt with this key will be processed as a new
255 * page fault.
256 */
257void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
258{
259 unsigned long flags;
260 int r;
261
262 if (!adev->irq.ih.faults)
263 return;
264
265 spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
266
267 r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
268 if (!WARN_ON_ONCE(r < 0)) {
269 adev->irq.ih.faults->count--;
270 WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
271 }
272
273 spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
274}