aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLan Xiao <Lan.Xiao@amd.com>2018-07-11 22:32:51 -0400
committerOded Gabbay <oded.gabbay@gmail.com>2018-07-11 22:32:51 -0400
commit58e698861255129a00765b69c0499bc0d044feb4 (patch)
treedc60a3f4a77dfcc224992c4bf0f0f9705c2fc7cd
parent2640c3facbd6e21e63c95f19588cc24913a263cd (diff)
drm/amdkfd: fix zero reading of VMID and PASID for Hawaii
Upon VM Fault, the VMID and PASID written by HW are zeros in Hawaii. Instead of reading from ih_ring_entry, read directly from the registers. This workaround fix the soft hang issues caused by mishandled VM Fault in Hawaii. Signed-off-by: Lan Xiao <Lan.Xiao@amd.com> Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com> Acked-by: Christian König <christian.koenig@amd.com> Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c20
-rw-r--r--drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c29
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_device.c14
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c4
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c6
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_priv.h9
-rw-r--r--drivers/gpu/drm/amd/include/kgd_kfd_interface.h5
7 files changed, 77 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index befc7c48b1cf..b4a05c510c75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
145 uint32_t page_table_base); 145 uint32_t page_table_base);
146static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); 146static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
147static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); 147static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
148static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
148 149
149/* Because of REG_GET_FIELD() being used, we put this function in the 150/* Because of REG_GET_FIELD() being used, we put this function in the
150 * asic specific file. 151 * asic specific file.
@@ -216,7 +217,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
216 .invalidate_tlbs = invalidate_tlbs, 217 .invalidate_tlbs = invalidate_tlbs,
217 .invalidate_tlbs_vmid = invalidate_tlbs_vmid, 218 .invalidate_tlbs_vmid = invalidate_tlbs_vmid,
218 .submit_ib = amdgpu_amdkfd_submit_ib, 219 .submit_ib = amdgpu_amdkfd_submit_ib,
219 .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info 220 .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
221 .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg
220}; 222};
221 223
222struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) 224struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -912,3 +914,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
912 RREG32(mmVM_INVALIDATE_RESPONSE); 914 RREG32(mmVM_INVALIDATE_RESPONSE);
913 return 0; 915 return 0;
914} 916}
917
918 /**
919 * read_vmid_from_vmfault_reg - read vmid from register
920 *
921 * adev: amdgpu_device pointer
922 * @vmid: vmid pointer
923 * read vmid from register (CIK).
924 */
925static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
926{
927 struct amdgpu_device *adev = get_amdgpu_device(kgd);
928
929 uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
930
931 return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
932}
diff --git a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
index cc33870e7edb..5d2475d5392c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c
@@ -25,12 +25,39 @@
25#include "cik_int.h" 25#include "cik_int.h"
26 26
27static bool cik_event_interrupt_isr(struct kfd_dev *dev, 27static bool cik_event_interrupt_isr(struct kfd_dev *dev,
28 const uint32_t *ih_ring_entry) 28 const uint32_t *ih_ring_entry,
29 uint32_t *patched_ihre,
30 bool *patched_flag)
29{ 31{
30 const struct cik_ih_ring_entry *ihre = 32 const struct cik_ih_ring_entry *ihre =
31 (const struct cik_ih_ring_entry *)ih_ring_entry; 33 (const struct cik_ih_ring_entry *)ih_ring_entry;
34 const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
32 unsigned int vmid, pasid; 35 unsigned int vmid, pasid;
33 36
37 /* This workaround is due to HW/FW limitation on Hawaii that
38 * VMID and PASID are not written into ih_ring_entry
39 */
40 if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
41 ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
42 dev->device_info->asic_family == CHIP_HAWAII) {
43 struct cik_ih_ring_entry *tmp_ihre =
44 (struct cik_ih_ring_entry *)patched_ihre;
45
46 *patched_flag = true;
47 *tmp_ihre = *ihre;
48
49 vmid = f2g->read_vmid_from_vmfault_reg(dev->kgd);
50 pasid = f2g->get_atc_vmid_pasid_mapping_pasid(dev->kgd, vmid);
51
52 tmp_ihre->ring_id &= 0x000000ff;
53 tmp_ihre->ring_id |= vmid << 8;
54 tmp_ihre->ring_id |= pasid << 16;
55
56 return (pasid != 0) &&
57 vmid >= dev->vm_info.first_vmid_kfd &&
58 vmid <= dev->vm_info.last_vmid_kfd;
59 }
60
34 /* Only handle interrupts from KFD VMIDs */ 61 /* Only handle interrupts from KFD VMIDs */
35 vmid = (ihre->ring_id & 0x0000ff00) >> 8; 62 vmid = (ihre->ring_id & 0x0000ff00) >> 8;
36 if (vmid < dev->vm_info.first_vmid_kfd || 63 if (vmid < dev->vm_info.first_vmid_kfd ||
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 48c505e83217..600751175760 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -577,14 +577,24 @@ dqm_start_error:
577/* This is called directly from KGD at ISR. */ 577/* This is called directly from KGD at ISR. */
578void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) 578void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
579{ 579{
580 uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
581 bool is_patched = false;
582
580 if (!kfd->init_complete) 583 if (!kfd->init_complete)
581 return; 584 return;
582 585
586 if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
587 dev_err_once(kfd_device, "Ring entry too small\n");
588 return;
589 }
590
583 spin_lock(&kfd->interrupt_lock); 591 spin_lock(&kfd->interrupt_lock);
584 592
585 if (kfd->interrupts_active 593 if (kfd->interrupts_active
586 && interrupt_is_wanted(kfd, ih_ring_entry) 594 && interrupt_is_wanted(kfd, ih_ring_entry,
587 && enqueue_ih_ring_entry(kfd, ih_ring_entry)) 595 patched_ihre, &is_patched)
596 && enqueue_ih_ring_entry(kfd,
597 is_patched ? patched_ihre : ih_ring_entry))
588 queue_work(kfd->ih_wq, &kfd->interrupt_work); 598 queue_work(kfd->ih_wq, &kfd->interrupt_work);
589 599
590 spin_unlock(&kfd->interrupt_lock); 600 spin_unlock(&kfd->interrupt_lock);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
index d6b64e692760..f836897bbf58 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c
@@ -26,7 +26,9 @@
26 26
27 27
28static bool event_interrupt_isr_v9(struct kfd_dev *dev, 28static bool event_interrupt_isr_v9(struct kfd_dev *dev,
29 const uint32_t *ih_ring_entry) 29 const uint32_t *ih_ring_entry,
30 uint32_t *patched_ihre,
31 bool *patched_flag)
30{ 32{
31 uint16_t source_id, client_id, pasid, vmid; 33 uint16_t source_id, client_id, pasid, vmid;
32 const uint32_t *data = ih_ring_entry; 34 const uint32_t *data = ih_ring_entry;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
index db6d9336b80d..c56ac47cd318 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_interrupt.c
@@ -151,13 +151,15 @@ static void interrupt_wq(struct work_struct *work)
151 ih_ring_entry); 151 ih_ring_entry);
152} 152}
153 153
154bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry) 154bool interrupt_is_wanted(struct kfd_dev *dev,
155 const uint32_t *ih_ring_entry,
156 uint32_t *patched_ihre, bool *flag)
155{ 157{
156 /* integer and bitwise OR so there is no boolean short-circuiting */ 158 /* integer and bitwise OR so there is no boolean short-circuiting */
157 unsigned int wanted = 0; 159 unsigned int wanted = 0;
158 160
159 wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev, 161 wanted |= dev->device_info->event_interrupt_class->interrupt_isr(dev,
160 ih_ring_entry); 162 ih_ring_entry, patched_ihre, flag);
161 163
162 return wanted != 0; 164 return wanted != 0;
163} 165}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 91a3368421b1..cd5121d925e0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -180,9 +180,10 @@ enum cache_policy {
180 180
181struct kfd_event_interrupt_class { 181struct kfd_event_interrupt_class {
182 bool (*interrupt_isr)(struct kfd_dev *dev, 182 bool (*interrupt_isr)(struct kfd_dev *dev,
183 const uint32_t *ih_ring_entry); 183 const uint32_t *ih_ring_entry, uint32_t *patched_ihre,
184 bool *patched_flag);
184 void (*interrupt_wq)(struct kfd_dev *dev, 185 void (*interrupt_wq)(struct kfd_dev *dev,
185 const uint32_t *ih_ring_entry); 186 const uint32_t *ih_ring_entry);
186}; 187};
187 188
188struct kfd_device_info { 189struct kfd_device_info {
@@ -806,7 +807,9 @@ int kfd_interrupt_init(struct kfd_dev *dev);
806void kfd_interrupt_exit(struct kfd_dev *dev); 807void kfd_interrupt_exit(struct kfd_dev *dev);
807void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); 808void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
808bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); 809bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry);
809bool interrupt_is_wanted(struct kfd_dev *dev, const uint32_t *ih_ring_entry); 810bool interrupt_is_wanted(struct kfd_dev *dev,
811 const uint32_t *ih_ring_entry,
812 uint32_t *patched_ihre, bool *flag);
810 813
811/* Power Management */ 814/* Power Management */
812void kgd2kfd_suspend(struct kfd_dev *kfd); 815void kgd2kfd_suspend(struct kfd_dev *kfd);
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 28b11d105288..76a30cbeee19 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -276,6 +276,10 @@ struct tile_config {
276 * faults. On GFXv9 VM fault information is fully contained in the IH 276 * faults. On GFXv9 VM fault information is fully contained in the IH
277 * packet and this function is not needed. 277 * packet and this function is not needed.
278 * 278 *
279 * @read_vmid_from_vmfault_reg: On Hawaii the VMID is not set in the
280 * IH ring entry. This function allows the KFD ISR to get the VMID
281 * from the fault status register as early as possible.
282 *
279 * This structure contains function pointers to services that the kgd driver 283 * This structure contains function pointers to services that the kgd driver
280 * provides to amdkfd driver. 284 * provides to amdkfd driver.
281 * 285 *
@@ -394,6 +398,7 @@ struct kfd2kgd_calls {
394 398
395 int (*get_vm_fault_info)(struct kgd_dev *kgd, 399 int (*get_vm_fault_info)(struct kgd_dev *kgd,
396 struct kfd_vm_fault_info *info); 400 struct kfd_vm_fault_info *info);
401 uint32_t (*read_vmid_from_vmfault_reg)(struct kgd_dev *kgd);
397}; 402};
398 403
399/** 404/**