aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.ibm.com>2019-02-13 16:54:37 -0500
committerPaul E. McKenney <paulmck@linux.ibm.com>2019-03-26 17:39:24 -0400
commitf5ad3991493c69d203d42b94d32349b54c58a3f1 (patch)
tree681ce80c3c203737842bbe73b06b8fa983a9e0ca
parent5cdfd174ea6c2dc1d331b61bdc9572698658600a (diff)
srcu: Remove cleanup_srcu_struct_quiesced()
The cleanup_srcu_struct_quiesced() function was added because NVME used WQ_MEM_RECLAIM workqueues and SRCU did not, which meant that NVME workqueues waiting on SRCU workqueues could result in deadlocks during low-memory conditions. However, SRCU now also has WQ_MEM_RECLAIM workqueues, so there is no longer a potential for deadlock. Furthermore, it turns out to be extremely hard to use cleanup_srcu_struct_quiesced() correctly due to the fact that SRCU callback invocation accesses the srcu_struct structure's per-CPU data area just after callbacks are invoked. Therefore, the usual practice of using srcu_barrier() to wait for callbacks to be invoked before invoking cleanup_srcu_struct_quiesced() fails because SRCU's callback-invocation workqueue handler might be delayed, which can result in cleanup_srcu_struct_quiesced() being invoked (and thus freeing the per-CPU data) before the SRCU's callback-invocation workqueue handler is finished using that per-CPU data. Nor is this a theoretical problem: KASAN emitted use-after-free warnings because of this problem on actual runs. In short, NVME can now safely invoke cleanup_srcu_struct(), which avoids the use-after-free scenario. And cleanup_srcu_struct_quiesced() is quite difficult to use safely. This commit therefore removes cleanup_srcu_struct_quiesced(), switching its sole user back to cleanup_srcu_struct(). This effectively reverts the following pair of commits: f7194ac32ca2 ("srcu: Add cleanup_srcu_struct_quiesced()") 4317228ad9b8 ("nvme: Avoid flush dependency in delete controller flow") Reported-by: Bart Van Assche <bvanassche@acm.org> Signed-off-by: Paul E. McKenney <paulmck@linux.ibm.com> Reviewed-by: Bart Van Assche <bvanassche@acm.org> Tested-by: Bart Van Assche <bvanassche@acm.org>
-rw-r--r--drivers/nvme/host/core.c2
-rw-r--r--include/linux/srcu.h36
-rw-r--r--kernel/rcu/rcutorture.c7
-rw-r--r--kernel/rcu/srcutiny.c9
-rw-r--r--kernel/rcu/srcutree.c30
5 files changed, 18 insertions, 66 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 470601980794..739c5b4830d7 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -388,7 +388,7 @@ static void nvme_free_ns_head(struct kref *ref)
388 nvme_mpath_remove_disk(head); 388 nvme_mpath_remove_disk(head);
389 ida_simple_remove(&head->subsys->ns_ida, head->instance); 389 ida_simple_remove(&head->subsys->ns_ida, head->instance);
390 list_del_init(&head->entry); 390 list_del_init(&head->entry);
391 cleanup_srcu_struct_quiesced(&head->srcu); 391 cleanup_srcu_struct(&head->srcu);
392 nvme_put_subsystem(head->subsys); 392 nvme_put_subsystem(head->subsys);
393 kfree(head); 393 kfree(head);
394} 394}
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index c495b2d51569..e432cc92c73d 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -56,45 +56,11 @@ struct srcu_struct { };
56 56
57void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, 57void call_srcu(struct srcu_struct *ssp, struct rcu_head *head,
58 void (*func)(struct rcu_head *head)); 58 void (*func)(struct rcu_head *head));
59void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced); 59void cleanup_srcu_struct(struct srcu_struct *ssp);
60int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); 60int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp);
61void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); 61void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp);
62void synchronize_srcu(struct srcu_struct *ssp); 62void synchronize_srcu(struct srcu_struct *ssp);
63 63
64/**
65 * cleanup_srcu_struct - deconstruct a sleep-RCU structure
66 * @ssp: structure to clean up.
67 *
68 * Must invoke this after you are finished using a given srcu_struct that
69 * was initialized via init_srcu_struct(), else you leak memory.
70 */
71static inline void cleanup_srcu_struct(struct srcu_struct *ssp)
72{
73 _cleanup_srcu_struct(ssp, false);
74}
75
76/**
77 * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
78 * @ssp: structure to clean up.
79 *
80 * Must invoke this after you are finished using a given srcu_struct that
81 * was initialized via init_srcu_struct(), else you leak memory. Also,
82 * all grace-period processing must have completed.
83 *
84 * "Completed" means that the last synchronize_srcu() and
85 * synchronize_srcu_expedited() calls must have returned before the call
86 * to cleanup_srcu_struct_quiesced(). It also means that the callback
87 * from the last call_srcu() must have been invoked before the call to
88 * cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
89 * with this last. Violating these rules will get you a WARN_ON() splat
90 * (with high probability, anyway), and will also cause the srcu_struct
91 * to be leaked.
92 */
93static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *ssp)
94{
95 _cleanup_srcu_struct(ssp, true);
96}
97
98#ifdef CONFIG_DEBUG_LOCK_ALLOC 64#ifdef CONFIG_DEBUG_LOCK_ALLOC
99 65
100/** 66/**
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index f14d1b18a74f..d2b226110835 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -592,12 +592,7 @@ static void srcu_torture_init(void)
592 592
593static void srcu_torture_cleanup(void) 593static void srcu_torture_cleanup(void)
594{ 594{
595 static DEFINE_TORTURE_RANDOM(rand); 595 cleanup_srcu_struct(&srcu_ctld);
596
597 if (torture_random(&rand) & 0x800)
598 cleanup_srcu_struct(&srcu_ctld);
599 else
600 cleanup_srcu_struct_quiesced(&srcu_ctld);
601 srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */ 596 srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
602} 597}
603 598
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 5d4a39a6505a..44d6606b8325 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -76,19 +76,16 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
76 * Must invoke this after you are finished using a given srcu_struct that 76 * Must invoke this after you are finished using a given srcu_struct that
77 * was initialized via init_srcu_struct(), else you leak memory. 77 * was initialized via init_srcu_struct(), else you leak memory.
78 */ 78 */
79void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) 79void cleanup_srcu_struct(struct srcu_struct *ssp)
80{ 80{
81 WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); 81 WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]);
82 if (quiesced) 82 flush_work(&ssp->srcu_work);
83 WARN_ON(work_pending(&ssp->srcu_work));
84 else
85 flush_work(&ssp->srcu_work);
86 WARN_ON(ssp->srcu_gp_running); 83 WARN_ON(ssp->srcu_gp_running);
87 WARN_ON(ssp->srcu_gp_waiting); 84 WARN_ON(ssp->srcu_gp_waiting);
88 WARN_ON(ssp->srcu_cb_head); 85 WARN_ON(ssp->srcu_cb_head);
89 WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail); 86 WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail);
90} 87}
91EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); 88EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
92 89
93/* 90/*
94 * Removes the count for the old reader from the appropriate element of 91 * Removes the count for the old reader from the appropriate element of
diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c
index 4f30f3ecabc1..9b761e546de8 100644
--- a/kernel/rcu/srcutree.c
+++ b/kernel/rcu/srcutree.c
@@ -360,8 +360,14 @@ static unsigned long srcu_get_delay(struct srcu_struct *ssp)
360 return SRCU_INTERVAL; 360 return SRCU_INTERVAL;
361} 361}
362 362
363/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */ 363/**
364void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) 364 * cleanup_srcu_struct - deconstruct a sleep-RCU structure
365 * @ssp: structure to clean up.
366 *
367 * Must invoke this after you are finished using a given srcu_struct that
368 * was initialized via init_srcu_struct(), else you leak memory.
369 */
370void cleanup_srcu_struct(struct srcu_struct *ssp)
365{ 371{
366 int cpu; 372 int cpu;
367 373
@@ -369,24 +375,12 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
369 return; /* Just leak it! */ 375 return; /* Just leak it! */
370 if (WARN_ON(srcu_readers_active(ssp))) 376 if (WARN_ON(srcu_readers_active(ssp)))
371 return; /* Just leak it! */ 377 return; /* Just leak it! */
372 if (quiesced) { 378 flush_delayed_work(&ssp->work);
373 if (WARN_ON(delayed_work_pending(&ssp->work)))
374 return; /* Just leak it! */
375 } else {
376 flush_delayed_work(&ssp->work);
377 }
378 for_each_possible_cpu(cpu) { 379 for_each_possible_cpu(cpu) {
379 struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu); 380 struct srcu_data *sdp = per_cpu_ptr(ssp->sda, cpu);
380 381
381 if (quiesced) { 382 del_timer_sync(&sdp->delay_work);
382 if (WARN_ON(timer_pending(&sdp->delay_work))) 383 flush_work(&sdp->work);
383 return; /* Just leak it! */
384 if (WARN_ON(work_pending(&sdp->work)))
385 return; /* Just leak it! */
386 } else {
387 del_timer_sync(&sdp->delay_work);
388 flush_work(&sdp->work);
389 }
390 if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist))) 384 if (WARN_ON(rcu_segcblist_n_cbs(&sdp->srcu_cblist)))
391 return; /* Forgot srcu_barrier(), so just leak it! */ 385 return; /* Forgot srcu_barrier(), so just leak it! */
392 } 386 }
@@ -399,7 +393,7 @@ void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced)
399 free_percpu(ssp->sda); 393 free_percpu(ssp->sda);
400 ssp->sda = NULL; 394 ssp->sda = NULL;
401} 395}
402EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); 396EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
403 397
404/* 398/*
405 * Counts the new reader in the appropriate per-CPU element of the 399 * Counts the new reader in the appropriate per-CPU element of the