summaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
authorIan Munsie <imunsie@au1.ibm.com>2014-11-14 01:37:50 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2014-11-17 20:58:38 -0500
commitbc78b05bb412fad135715551fc536ca511a3cff2 (patch)
treebf855a2e5fab58f19e678971296b72a195c2eabb /drivers/misc
parent16b1d26e77b142546e2b9b6dc3b5aa5c44ae3b77 (diff)
cxl: Return error to PSL if IRQ demultiplexing fails & print clearer warning
If an AFU has a hardware bug that causes it to acknowledge a context terminate or remove while that context has outstanding transactions, it is possible for the kernel to receive an interrupt for that context after we have removed it from the context list. The kernel will not be able to demultiplex the interrupt (or worse - if we have already reallocated the process handle we could mis-attribute it to the new context), and printed a big scary warning. It did not acknowledge the interrupt, which would effectively halt further translation fault processing on the PSL. This patch makes the warning clearer about the likely cause of the issue (i.e. hardware bug) to make it obvious to future AFU designers of what needs to be fixed. It also prints out the process handle which can then be matched up with hardware and software traces for debugging. It also acknowledges the interrupt to the PSL with either an address error or acknowledge, so that the PSL can continue with other translations. Signed-off-by: Ian Munsie <imunsie@au1.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/cxl/cxl.h2
-rw-r--r--drivers/misc/cxl/irq.c46
-rw-r--r--drivers/misc/cxl/native.c14
3 files changed, 37 insertions, 25 deletions
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index 3d2b8677ec8a..64a4aa3a5c5d 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -612,7 +612,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed,
612 u64 amr); 612 u64 amr);
613int cxl_detach_process(struct cxl_context *ctx); 613int cxl_detach_process(struct cxl_context *ctx);
614 614
615int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info); 615int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info);
616int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); 616int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask);
617 617
618int cxl_check_error(struct cxl_afu *afu); 618int cxl_check_error(struct cxl_afu *afu);
diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c
index 336020c8e1af..35fcb3d43dc0 100644
--- a/drivers/misc/cxl/irq.c
+++ b/drivers/misc/cxl/irq.c
@@ -92,20 +92,13 @@ static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 da
92 return IRQ_HANDLED; 92 return IRQ_HANDLED;
93} 93}
94 94
95static irqreturn_t cxl_irq(int irq, void *data) 95static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info)
96{ 96{
97 struct cxl_context *ctx = data; 97 struct cxl_context *ctx = data;
98 struct cxl_irq_info irq_info;
99 u64 dsisr, dar; 98 u64 dsisr, dar;
100 int result;
101 99
102 if ((result = cxl_get_irq(ctx, &irq_info))) { 100 dsisr = irq_info->dsisr;
103 WARN(1, "Unable to get CXL IRQ Info: %i\n", result); 101 dar = irq_info->dar;
104 return IRQ_HANDLED;
105 }
106
107 dsisr = irq_info.dsisr;
108 dar = irq_info.dar;
109 102
110 pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); 103 pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar);
111 104
@@ -149,9 +142,9 @@ static irqreturn_t cxl_irq(int irq, void *data)
149 if (dsisr & CXL_PSL_DSISR_An_UR) 142 if (dsisr & CXL_PSL_DSISR_An_UR)
150 pr_devel("CXL interrupt: AURP PTE not found\n"); 143 pr_devel("CXL interrupt: AURP PTE not found\n");
151 if (dsisr & CXL_PSL_DSISR_An_PE) 144 if (dsisr & CXL_PSL_DSISR_An_PE)
152 return handle_psl_slice_error(ctx, dsisr, irq_info.errstat); 145 return handle_psl_slice_error(ctx, dsisr, irq_info->errstat);
153 if (dsisr & CXL_PSL_DSISR_An_AE) { 146 if (dsisr & CXL_PSL_DSISR_An_AE) {
154 pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info.afu_err); 147 pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err);
155 148
156 if (ctx->pending_afu_err) { 149 if (ctx->pending_afu_err) {
157 /* 150 /*
@@ -163,10 +156,10 @@ static irqreturn_t cxl_irq(int irq, void *data)
163 */ 156 */
164 dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " 157 dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error "
165 "undelivered to pe %i: %.llx\n", 158 "undelivered to pe %i: %.llx\n",
166 ctx->pe, irq_info.afu_err); 159 ctx->pe, irq_info->afu_err);
167 } else { 160 } else {
168 spin_lock(&ctx->lock); 161 spin_lock(&ctx->lock);
169 ctx->afu_err = irq_info.afu_err; 162 ctx->afu_err = irq_info->afu_err;
170 ctx->pending_afu_err = 1; 163 ctx->pending_afu_err = 1;
171 spin_unlock(&ctx->lock); 164 spin_unlock(&ctx->lock);
172 165
@@ -182,24 +175,43 @@ static irqreturn_t cxl_irq(int irq, void *data)
182 return IRQ_HANDLED; 175 return IRQ_HANDLED;
183} 176}
184 177
178static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info)
179{
180 if (irq_info->dsisr & CXL_PSL_DSISR_TRANS)
181 cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE);
182 else
183 cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A);
184
185 return IRQ_HANDLED;
186}
187
185static irqreturn_t cxl_irq_multiplexed(int irq, void *data) 188static irqreturn_t cxl_irq_multiplexed(int irq, void *data)
186{ 189{
187 struct cxl_afu *afu = data; 190 struct cxl_afu *afu = data;
188 struct cxl_context *ctx; 191 struct cxl_context *ctx;
192 struct cxl_irq_info irq_info;
189 int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; 193 int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff;
190 int ret; 194 int ret;
191 195
196 if ((ret = cxl_get_irq(afu, &irq_info))) {
197 WARN(1, "Unable to get CXL IRQ Info: %i\n", ret);
198 return fail_psl_irq(afu, &irq_info);
199 }
200
192 rcu_read_lock(); 201 rcu_read_lock();
193 ctx = idr_find(&afu->contexts_idr, ph); 202 ctx = idr_find(&afu->contexts_idr, ph);
194 if (ctx) { 203 if (ctx) {
195 ret = cxl_irq(irq, ctx); 204 ret = cxl_irq(irq, ctx, &irq_info);
196 rcu_read_unlock(); 205 rcu_read_unlock();
197 return ret; 206 return ret;
198 } 207 }
199 rcu_read_unlock(); 208 rcu_read_unlock();
200 209
201 WARN(1, "Unable to demultiplex CXL PSL IRQ\n"); 210 WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR"
202 return IRQ_HANDLED; 211 " %.16llx\n(Possible AFU HW issue - was a term/remove acked"
212 " with outstanding transactions?)\n", ph, irq_info.dsisr,
213 irq_info.dar);
214 return fail_psl_irq(afu, &irq_info);
203} 215}
204 216
205static irqreturn_t cxl_irq_afu(int irq, void *data) 217static irqreturn_t cxl_irq_afu(int irq, void *data)
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index d47532e8f4f1..9a5a442269a8 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -637,18 +637,18 @@ int cxl_detach_process(struct cxl_context *ctx)
637 return detach_process_native_afu_directed(ctx); 637 return detach_process_native_afu_directed(ctx);
638} 638}
639 639
640int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info) 640int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info)
641{ 641{
642 u64 pidtid; 642 u64 pidtid;
643 643
644 info->dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An); 644 info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An);
645 info->dar = cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An); 645 info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An);
646 info->dsr = cxl_p2n_read(ctx->afu, CXL_PSL_DSR_An); 646 info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An);
647 pidtid = cxl_p2n_read(ctx->afu, CXL_PSL_PID_TID_An); 647 pidtid = cxl_p2n_read(afu, CXL_PSL_PID_TID_An);
648 info->pid = pidtid >> 32; 648 info->pid = pidtid >> 32;
649 info->tid = pidtid & 0xffffffff; 649 info->tid = pidtid & 0xffffffff;
650 info->afu_err = cxl_p2n_read(ctx->afu, CXL_AFU_ERR_An); 650 info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An);
651 info->errstat = cxl_p2n_read(ctx->afu, CXL_PSL_ErrStat_An); 651 info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An);
652 652
653 return 0; 653 return 0;
654} 654}