aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIan Munsie <imunsie@au1.ibm.com>2014-12-08 03:17:55 -0500
committerMichael Ellerman <mpe@ellerman.id.au>2014-12-11 21:06:47 -0500
commitee41d11d53c8fc4968f0816504651541d606cf40 (patch)
treec3680cf3bdcfadcf5c23088fb8b3a75a4d48b6ef
parent7c5c92ed56d932b2c19c3f8aea86369509407d33 (diff)
cxl: Change contexts_lock to a mutex to fix sleep while atomic bug
We had a known sleep while atomic bug if a CXL device was forcefully unbound while it was in use. This could occur as a result of EEH, or manually induced with something like this while the device was in use: echo 0000:01:00.0 > /sys/bus/pci/drivers/cxl-pci/unbind The issue was that in this code path we iterated over each context and forcefully detached it with the contexts_lock spin lock held, however the detach also needed to take the spu_mutex, and call schedule. This patch changes the contexts_lock to a mutex so that we are not in atomic context while doing the detach, thereby avoiding the sleep while atomic. Also delete the related TODO comment, which suggested an alternate solution which turned out to not be workable. Cc: stable@vger.kernel.org Signed-off-by: Ian Munsie <imunsie@au1.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
-rw-r--r--drivers/misc/cxl/context.c15
-rw-r--r--drivers/misc/cxl/cxl.h2
-rw-r--r--drivers/misc/cxl/native.c7
-rw-r--r--drivers/misc/cxl/pci.c2
-rw-r--r--drivers/misc/cxl/sysfs.c10
5 files changed, 15 insertions, 21 deletions
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index cca472109135..4aa31a3fb448 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -82,12 +82,12 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
82 * Allocating IDR! We better make sure everything's setup that 82 * Allocating IDR! We better make sure everything's setup that
83 * dereferences from it. 83 * dereferences from it.
84 */ 84 */
85 mutex_lock(&afu->contexts_lock);
85 idr_preload(GFP_KERNEL); 86 idr_preload(GFP_KERNEL);
86 spin_lock(&afu->contexts_lock);
87 i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0, 87 i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
88 ctx->afu->num_procs, GFP_NOWAIT); 88 ctx->afu->num_procs, GFP_NOWAIT);
89 spin_unlock(&afu->contexts_lock);
90 idr_preload_end(); 89 idr_preload_end();
90 mutex_unlock(&afu->contexts_lock);
91 if (i < 0) 91 if (i < 0)
92 return i; 92 return i;
93 93
@@ -168,21 +168,22 @@ void cxl_context_detach_all(struct cxl_afu *afu)
168 struct cxl_context *ctx; 168 struct cxl_context *ctx;
169 int tmp; 169 int tmp;
170 170
171 rcu_read_lock(); 171 mutex_lock(&afu->contexts_lock);
172 idr_for_each_entry(&afu->contexts_idr, ctx, tmp) 172 idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
173 /* 173 /*
174 * Anything done in here needs to be setup before the IDR is 174 * Anything done in here needs to be setup before the IDR is
175 * created and torn down after the IDR removed 175 * created and torn down after the IDR removed
176 */ 176 */
177 __detach_context(ctx); 177 __detach_context(ctx);
178 rcu_read_unlock(); 178 }
179 mutex_unlock(&afu->contexts_lock);
179} 180}
180 181
181void cxl_context_free(struct cxl_context *ctx) 182void cxl_context_free(struct cxl_context *ctx)
182{ 183{
183 spin_lock(&ctx->afu->contexts_lock); 184 mutex_lock(&ctx->afu->contexts_lock);
184 idr_remove(&ctx->afu->contexts_idr, ctx->pe); 185 idr_remove(&ctx->afu->contexts_idr, ctx->pe);
185 spin_unlock(&ctx->afu->contexts_lock); 186 mutex_unlock(&ctx->afu->contexts_lock);
186 synchronize_rcu(); 187 synchronize_rcu();
187 188
188 free_page((u64)ctx->sstp); 189 free_page((u64)ctx->sstp);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b5b6bda44a00..7c05239359df 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -351,7 +351,7 @@ struct cxl_afu {
351 struct device *chardev_s, *chardev_m, *chardev_d; 351 struct device *chardev_s, *chardev_m, *chardev_d;
352 struct idr contexts_idr; 352 struct idr contexts_idr;
353 struct dentry *debugfs; 353 struct dentry *debugfs;
354 spinlock_t contexts_lock; 354 struct mutex contexts_lock;
355 struct mutex spa_mutex; 355 struct mutex spa_mutex;
356 spinlock_t afu_cntl_lock; 356 spinlock_t afu_cntl_lock;
357 357
diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 9a5a442269a8..1001cf49af94 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c
@@ -610,13 +610,6 @@ static inline int detach_process_native_dedicated(struct cxl_context *ctx)
610 return 0; 610 return 0;
611} 611}
612 612
613/*
614 * TODO: handle case when this is called inside a rcu_read_lock() which may
615 * happen when we unbind the driver (ie. cxl_context_detach_all()) . Terminate
616 * & remove use a mutex lock and schedule which will not good with lock held.
617 * May need to write do_process_element_cmd() that handles outstanding page
618 * faults synchronously.
619 */
620static inline int detach_process_native_afu_directed(struct cxl_context *ctx) 613static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
621{ 614{
622 if (!ctx->pe_inserted) 615 if (!ctx->pe_inserted)
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 10c98ab7f46e..0f2cc9f8b4db 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -502,7 +502,7 @@ static struct cxl_afu *cxl_alloc_afu(struct cxl *adapter, int slice)
502 afu->dev.release = cxl_release_afu; 502 afu->dev.release = cxl_release_afu;
503 afu->slice = slice; 503 afu->slice = slice;
504 idr_init(&afu->contexts_idr); 504 idr_init(&afu->contexts_idr);
505 spin_lock_init(&afu->contexts_lock); 505 mutex_init(&afu->contexts_lock);
506 spin_lock_init(&afu->afu_cntl_lock); 506 spin_lock_init(&afu->afu_cntl_lock);
507 mutex_init(&afu->spa_mutex); 507 mutex_init(&afu->spa_mutex);
508 508
diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index ce7ec06d87d1..461bdbd5d483 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c
@@ -121,7 +121,7 @@ static ssize_t reset_store_afu(struct device *device,
121 int rc; 121 int rc;
122 122
123 /* Not safe to reset if it is currently in use */ 123 /* Not safe to reset if it is currently in use */
124 spin_lock(&afu->contexts_lock); 124 mutex_lock(&afu->contexts_lock);
125 if (!idr_is_empty(&afu->contexts_idr)) { 125 if (!idr_is_empty(&afu->contexts_idr)) {
126 rc = -EBUSY; 126 rc = -EBUSY;
127 goto err; 127 goto err;
@@ -132,7 +132,7 @@ static ssize_t reset_store_afu(struct device *device,
132 132
133 rc = count; 133 rc = count;
134err: 134err:
135 spin_unlock(&afu->contexts_lock); 135 mutex_unlock(&afu->contexts_lock);
136 return rc; 136 return rc;
137} 137}
138 138
@@ -247,7 +247,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
247 int rc = -EBUSY; 247 int rc = -EBUSY;
248 248
249 /* can't change this if we have a user */ 249 /* can't change this if we have a user */
250 spin_lock(&afu->contexts_lock); 250 mutex_lock(&afu->contexts_lock);
251 if (!idr_is_empty(&afu->contexts_idr)) 251 if (!idr_is_empty(&afu->contexts_idr))
252 goto err; 252 goto err;
253 253
@@ -271,7 +271,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
271 afu->current_mode = 0; 271 afu->current_mode = 0;
272 afu->num_procs = 0; 272 afu->num_procs = 0;
273 273
274 spin_unlock(&afu->contexts_lock); 274 mutex_unlock(&afu->contexts_lock);
275 275
276 if ((rc = _cxl_afu_deactivate_mode(afu, old_mode))) 276 if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
277 return rc; 277 return rc;
@@ -280,7 +280,7 @@ static ssize_t mode_store(struct device *device, struct device_attribute *attr,
280 280
281 return count; 281 return count;
282err: 282err:
283 spin_unlock(&afu->contexts_lock); 283 mutex_unlock(&afu->contexts_lock);
284 return rc; 284 return rc;
285} 285}
286 286