diff options
author | Lan Xiao <Lan.Xiao@amd.com> | 2018-07-11 22:32:51 -0400 |
---|---|---|
committer | Oded Gabbay <oded.gabbay@gmail.com> | 2018-07-11 22:32:51 -0400 |
commit | 58e698861255129a00765b69c0499bc0d044feb4 (patch) | |
tree | dc60a3f4a77dfcc224992c4bf0f0f9705c2fc7cd | |
parent | 2640c3facbd6e21e63c95f19588cc24913a263cd (diff) |
drm/amdkfd: fix zero reading of VMID and PASID for Hawaii
Upon VM Fault, the VMID and PASID written by HW are zeros in
Hawaii. Instead of reading from ih_ring_entry, read directly
from the registers. This workaround fix the soft hang issues
caused by mishandled VM Fault in Hawaii.
Signed-off-by: Lan Xiao <Lan.Xiao@amd.com>
Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | 20 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | 29 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 14 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 5 |
7 files changed, 77 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index befc7c48b1cf..b4a05c510c75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c | |||
@@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, | |||
145 | uint32_t page_table_base); | 145 | uint32_t page_table_base); |
146 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); | 146 | static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); |
147 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); | 147 | static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); |
148 | static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); | ||
148 | 149 | ||
149 | /* Because of REG_GET_FIELD() being used, we put this function in the | 150 | /* Because of REG_GET_FIELD() being used, we put this function in the |
150 | * asic specific file. | 151 | * asic specific file. |
@@ -216,7 +217,8 @@ static const struct kfd2kgd_calls kfd2kgd = { | |||
216 | .invalidate_tlbs = invalidate_tlbs, | 217 | .invalidate_tlbs = invalidate_tlbs, |
217 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, | 218 | .invalidate_tlbs_vmid = invalidate_tlbs_vmid, |
218 | .submit_ib = amdgpu_amdkfd_submit_ib, | 219 | .submit_ib = amdgpu_amdkfd_submit_ib, |
219 | .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info | 220 | .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, |
221 | .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg | ||
220 | }; | 222 | }; |
221 | 223 | ||
222 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) | 224 | struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) |
@@ -912,3 +914,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid) | |||
912 | RREG32(mmVM_INVALIDATE_RESPONSE); | 914 | RREG32(mmVM_INVALIDATE_RESPONSE); |
913 | return 0; | 915 | return 0; |
914 | } | 916 | } |
917 | |||
918 | /** | ||
919 | * read_vmid_from_vmfault_reg - read vmid from register | ||
920 | * | ||
921 | * adev: amdgpu_device pointer | ||
922 | * @vmid: vmid pointer | ||
923 | * read vmid from register (CIK). | ||
924 | */ | ||
925 | static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd) | ||
926 | { | ||
927 | struct amdgpu_device *adev = get_amdgpu_device(kgd); | ||
928 | |||
929 | uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS); | ||
930 | |||
931 | return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID); | ||
932 | } | ||
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c index cc33870e7edb..5d2475d5392c 100644 --- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c | |||
@@ -25,12 +25,39 @@ | |||
25 | #include "cik_int.h" | 25 | #include "cik_int.h" |
26 | 26 | ||
27 | static bool cik_event_interrupt_isr(struct kfd_dev *dev, | 27 | static bool cik_event_interrupt_isr(struct kfd_dev *dev, |
28 | const uint32_t *ih_ring_entry) | 28 | const uint32_t *ih_ring_entry, |
29 | uint32_t *patched_ihre, | ||
30 | bool *patched_flag) | ||
29 | { | 31 | { |
30 | const struct cik_ih_ring_entry *ihre = | 32 | const struct cik_ih_ring_entry *ihre = |
31 | (const struct cik_ih_ring_entry *)ih_ring_entry; | 33 | (const struct cik_ih_ring_entry *)ih_ring_entry; |
34 | const struct kfd2kgd_calls *f2g = dev->kfd2kgd; | ||
32 | unsigned int vmid, pasid; | 35 | unsigned int vmid, pasid; |
33 | 36 | ||
37 | /* This workaround is due to HW/FW limitation on Hawaii that | ||
38 | * VMID and PASID are not written into ih_ring_entry | ||
39 | */ | ||
40 | if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT || | ||
41 | ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) && | ||
42 | dev->device_info->asic_family == CHIP_HAWAII) { | ||
43 | struct cik_ih_ring_entry *tmp_ihre = | ||
44 | (struct cik_ih_ring_entry *)patched_ihre; | ||
45 | |||
46 | *patched_flag = true; | ||
47 | *tmp_ihre = *ihre; | ||
48 | |||
49 | vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd); | ||
50 | pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid); | ||
51 | |||
52 | tmp_ihre->ring_id &= 0x000000ff; | ||
53 | tmp_ihre->ring_id |= vmid << 8; | ||
54 | tmp_ihre->ring_id |= pasid << 16; | ||
55 | |||
56 | return (pasid != 0) && | ||
57 | vmid >= dev->vm_info.first_vmid_kfd && | ||
58 | vmid <= dev->vm_info.last_vmid_kfd; | ||
59 | } | ||
60 | |||
34 | /* Only handle interrupts from KFD VMIDs */ | 61 | /* Only handle interrupts from KFD VMIDs */ |
35 | vmid = (ihre->ring_id & 0x0000ff00) >> 8; | 62 | vmid = (ihre->ring_id & 0x0000ff00) >> 8; |
36 | if (vmid < dev->vm_info.first_vmid_kfd || | 63 | if (vmid < dev->vm_info.first_vmid_kfd || |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 48c505e83217..600751175760 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c | |||
@@ -577,14 +577,24 @@ dqm_start_error: | |||
577 | /* This is called directly from KGD at ISR. */ | 577 | /* This is called directly from KGD at ISR. */ |
578 | void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) | 578 | void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) |
579 | { | 579 | { |
580 | uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; | ||
581 | bool is_patched = false; | ||
582 | |||
580 | if (!kfd->init_complete) | 583 | if (!kfd->init_complete) |
581 | return; | 584 | return; |
582 | 585 | ||
586 | if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { | ||
587 | dev_err_once(kfd_device, "Ring entry too small\n"); | ||
588 | return; | ||
589 | } | ||
590 | |||
583 | spin_lock(&kfd->interrupt_lock); | 591 | spin_lock(&kfd->interrupt_lock); |
584 | 592 | ||
585 | if (kfd->interrupts_active | 593 | if (kfd->interrupts_active |
586 | && interrupt_is_wanted(kfd, ih_ring_entry) | 594 | && interrupt_is_wanted(kfd, ih_ring_entry, |
587 | && enqueue_ih_ring_entry(kfd, ih_ring_entry)) | 595 | patched_ihre, &is_patched) |
596 | && enqueue_ih_ring_entry(kfd, | ||
597 | is_patched ? patched_ihre : ih_ring_entry)) | ||
588 | queue_work(kfd->ih_wq, &kfd->interrupt_work); | 598 | queue_work(kfd->ih_wq, &kfd->interrupt_work); |
589 | 599 | ||
590 | spin_unlock(&kfd->interrupt_lock); | 600 | spin_unlock(&kfd->interrupt_lock); |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index d6b64e692760..f836897bbf58 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c | |||
@@ -26,7 +26,9 @@ | |||
26 | 26 | ||
27 | 27 | ||
28 | static bool event_interrupt_isr_v9(struct kfd_dev *dev, | 28 | static bool event_interrupt_isr_v9(struct kfd_dev *dev, |
29 | const uint32_t *ih_ring_entry) | 29 | const uint32_t *ih_ring_entry, |
30 | uint32_t *patched_ihre, | ||
31 | bool *patched_flag) | ||
30 | { | 32 | { |
31 | uint16_t source_id, client_id, pasid, vmid; | 33 | uint16_t source_id, client_id, pasid, vmid; |
32 | const uint32_t *data = ih_ring_entry; | 34 | const uint32_t *data = ih_ring_entry; |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c index db6d9336b80d..c56ac47cd318 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c | |||
@@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work) | |||
151 | ih_ring_entry); | 151 | ih_ring_entry); |
152 | } | 152 | } |
153 | 153 | ||
154 | bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) | 154 | bool interrupt_is_wanted(struct kfd_dev *dev, |
155 | const uint32_t *ih_ring_entry, | ||
156 | uint32_t *patched_ihre, bool *flag) | ||
155 | { | 157 | { |
156 | /* integer and bitwise OR so there is no boolean short-circuiting */ | 158 | /* integer and bitwise OR so there is no boolean short-circuiting */ |
157 | unsigned int wanted = 0; | 159 | unsigned int wanted = 0; |
158 | 160 | ||
159 | wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, | 161 | wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, |
160 | ih_ring_entry); | 162 | ih_ring_entry, patched_ihre, flag); |
161 | 163 | ||
162 | return wanted != 0; | 164 | return wanted != 0; |
163 | } | 165 | } |
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 91a3368421b1..cd5121d925e0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h | |||
@@ -180,9 +180,10 @@ enum cache_policy { | |||
180 | 180 | ||
181 | struct kfd_event_interrupt_class { | 181 | struct kfd_event_interrupt_class { |
182 | bool (*interrupt_isr)(struct kfd_dev *dev, | 182 | bool (*interrupt_isr)(struct kfd_dev *dev, |
183 | const uint32_t *ih_ring_entry); | 183 | const uint32_t *ih_ring_entry, uint32_t *patched_ihre, |
184 | bool *patched_flag); | ||
184 | void (*interrupt_wq)(struct kfd_dev *dev, | 185 | void (*interrupt_wq)(struct kfd_dev *dev, |
185 | const uint32_t *ih_ring_entry); | 186 | const uint32_t *ih_ring_entry); |
186 | }; | 187 | }; |
187 | 188 | ||
188 | struct kfd_device_info { | 189 | struct kfd_device_info { |
@@ -806,7 +807,9 @@ int kfd_interrupt_init(struct kfd_dev *dev); | |||
806 | void kfd_interrupt_exit(struct kfd_dev *dev); | 807 | void kfd_interrupt_exit(struct kfd_dev *dev); |
807 | void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); | 808 | void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); |
808 | bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); | 809 | bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); |
809 | bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry); | 810 | bool interrupt_is_wanted(struct kfd_dev *dev, |
811 | const uint32_t *ih_ring_entry, | ||
812 | uint32_t *patched_ihre, bool *flag); | ||
810 | 813 | ||
811 | /* Power Management */ | 814 | /* Power Management */ |
812 | void kgd2kfd_suspend(struct kfd_dev *kfd); | 815 | void kgd2kfd_suspend(struct kfd_dev *kfd); |
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h index 28b11d105288..76a30cbeee19 100644 --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h | |||
@@ -276,6 +276,10 @@ struct tile_config { | |||
276 | * faults. On GFXv9 VM fault information is fully contained in the IH | 276 | * faults. On GFXv9 VM fault information is fully contained in the IH |
277 | * packet and this function is not needed. | 277 | * packet and this function is not needed. |
278 | * | 278 | * |
279 | * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the | ||
280 | * IH ring entry. This function allows the KFD ISR to get the VMID | ||
281 | * from the fault status register as early as possible. | ||
282 | * | ||
279 | * This structure contains function pointers to services that the kgd driver | 283 | * This structure contains function pointers to services that the kgd driver |
280 | * provides to amdkfd driver. | 284 | * provides to amdkfd driver. |
281 | * | 285 | * |
@@ -394,6 +398,7 @@ struct kfd2kgd_calls { | |||
394 | 398 | ||
395 | int (*get_vm_fault_info)(struct kgd_dev *kgd, | 399 | int (*get_vm_fault_info)(struct kgd_dev *kgd, |
396 | struct kfd_vm_fault_info *info); | 400 | struct kfd_vm_fault_info *info); |
401 | uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd); | ||
397 | }; | 402 | }; |
398 | 403 | ||
399 | /** | 404 | /** |