aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
diff options
context:
space:
mode:
authorFelix Kuehling <Felix.Kuehling@amd.com>2017-09-21 16:26:41 -0400
committerAlex Deucher <alexander.deucher@amd.com>2017-09-28 16:03:30 -0400
commitc98171ccf6580407d07a3b5dc8188ce9e1f4f7ca (patch)
tree191e13e83451ea87c6e4fdc758425b046a0c4bf8 /drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
parent1bab0fc01b84c1aa8a65a1f1de885e1faab48264 (diff)
drm/amdgpu: Handle GPUVM fault storms
When many wavefronts cause VM faults at the same time, it can overwhelm the interrupt handler and cause IH ring overflows before the driver can notify or kill the faulting application. As a workaround I'm introducing limited per-VM fault credit. After that number of VM faults have occurred, further VM faults are filtered out at the prescreen stage of processing. This depends on the PASID in the interrupt packet, so it currently only works for KFD contexts. Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c31
1 files changed, 31 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 8fcc743dfa86..c91d5c7a273d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2682,6 +2682,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
2682 } 2682 }
2683 2683
2684 INIT_KFIFO(vm->faults); 2684 INIT_KFIFO(vm->faults);
2685 vm->fault_credit = 16;
2685 2686
2686 return 0; 2687 return 0;
2687 2688
@@ -2776,6 +2777,36 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
2776} 2777}
2777 2778
2778/** 2779/**
2780 * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
2781 *
2782 * @adev: amdgpu_device pointer
2783 * @pasid: PASID do identify the VM
2784 *
2785 * This function is expected to be called in interrupt context. Returns
2786 * true if there was fault credit, false otherwise
2787 */
2788bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
2789 unsigned int pasid)
2790{
2791 struct amdgpu_vm *vm;
2792
2793 spin_lock(&adev->vm_manager.pasid_lock);
2794 vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
2795 spin_unlock(&adev->vm_manager.pasid_lock);
2796 if (!vm)
2797 /* VM not found, can't track fault credit */
2798 return true;
2799
2800 /* No lock needed. only accessed by IRQ handler */
2801 if (!vm->fault_credit)
2802 /* Too many faults in this VM */
2803 return false;
2804
2805 vm->fault_credit--;
2806 return true;
2807}
2808
2809/**
2779 * amdgpu_vm_manager_init - init the VM manager 2810 * amdgpu_vm_manager_init - init the VM manager
2780 * 2811 *
2781 * @adev: amdgpu_device pointer 2812 * @adev: amdgpu_device pointer