aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJoe Thornber <ejt@redhat.com>2014-04-08 06:29:01 -0400
committerMike Snitzer <snitzer@redhat.com>2014-04-08 10:18:35 -0400
commitb10ebd34cccae1b431caf1be54919aede2be7cbe (patch)
treef8f848169991555d7ea0719740da37645e405c99
parent5e3283e2920a0bd8a806964d80274b8756e0dd7f (diff)
dm thin: fix rcu_read_lock being held in code that can sleep
Commit c140e1c4e23 ("dm thin: use per thin device deferred bio lists") introduced the use of an rculist for all active thin devices. The use of rcu_read_lock() in process_deferred_bios() can result in a BUG if a dm_bio_prison_cell must be allocated as a side-effect of bio_detain(): BUG: sleeping function called from invalid context at mm/mempool.c:203 in_atomic(): 1, irqs_disabled(): 0, pid: 6, name: kworker/u8:0 3 locks held by kworker/u8:0/6: #0: ("dm-" "thin"){.+.+..}, at: [<ffffffff8106be42>] process_one_work+0x192/0x550 #1: ((&pool->worker)){+.+...}, at: [<ffffffff8106be42>] process_one_work+0x192/0x550 #2: (rcu_read_lock){.+.+..}, at: [<ffffffff816360b5>] do_worker+0x5/0x4d0 We can't process deferred bios with the rcu lock held, since dm_bio_prison_cell allocation may block if the bio-prison's cell mempool is exhausted. To fix: - Introduce a refcount and completion field to each thin_c - Add thin_get/put methods for adjusting the refcount. If the refcount hits zero then the completion is triggered. - Initialise refcount to 1 when creating thin_c - When iterating the active_thins list we thin_get() whilst the rcu lock is held. - After the rcu lock is dropped we process the deferred bios for that thin. - When destroying a thin_c we thin_put() and then wait for the completion -- to avoid a race between the worker thread iterating from that thin_c and destroying the thin_c. Signed-off-by: Joe Thornber <ejt@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r--drivers/md/dm-thin.c70
1 files changed, 67 insertions, 3 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index ae5fd0b9c75c..28fc282b61b2 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -232,6 +232,13 @@ struct thin_c {
232 struct bio_list deferred_bio_list; 232 struct bio_list deferred_bio_list;
233 struct bio_list retry_on_resume_list; 233 struct bio_list retry_on_resume_list;
234 struct rb_root sort_bio_list; /* sorted list of deferred bios */ 234 struct rb_root sort_bio_list; /* sorted list of deferred bios */
235
236 /*
237 * Ensures the thin is not destroyed until the worker has finished
238 * iterating the active_thins list.
239 */
240 atomic_t refcount;
241 struct completion can_destroy;
235}; 242};
236 243
237/*----------------------------------------------------------------*/ 244/*----------------------------------------------------------------*/
@@ -1486,6 +1493,45 @@ static void process_thin_deferred_bios(struct thin_c *tc)
1486 blk_finish_plug(&plug); 1493 blk_finish_plug(&plug);
1487} 1494}
1488 1495
1496static void thin_get(struct thin_c *tc);
1497static void thin_put(struct thin_c *tc);
1498
1499/*
1500 * We can't hold rcu_read_lock() around code that can block. So we
1501 * find a thin with the rcu lock held; bump a refcount; then drop
1502 * the lock.
1503 */
1504static struct thin_c *get_first_thin(struct pool *pool)
1505{
1506 struct thin_c *tc = NULL;
1507
1508 rcu_read_lock();
1509 if (!list_empty(&pool->active_thins)) {
1510 tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
1511 thin_get(tc);
1512 }
1513 rcu_read_unlock();
1514
1515 return tc;
1516}
1517
1518static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc)
1519{
1520 struct thin_c *old_tc = tc;
1521
1522 rcu_read_lock();
1523 list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) {
1524 thin_get(tc);
1525 thin_put(old_tc);
1526 rcu_read_unlock();
1527 return tc;
1528 }
1529 thin_put(old_tc);
1530 rcu_read_unlock();
1531
1532 return NULL;
1533}
1534
1489static void process_deferred_bios(struct pool *pool) 1535static void process_deferred_bios(struct pool *pool)
1490{ 1536{
1491 unsigned long flags; 1537 unsigned long flags;
@@ -1493,10 +1539,11 @@ static void process_deferred_bios(struct pool *pool)
1493 struct bio_list bios; 1539 struct bio_list bios;
1494 struct thin_c *tc; 1540 struct thin_c *tc;
1495 1541
1496 rcu_read_lock(); 1542 tc = get_first_thin(pool);
1497 list_for_each_entry_rcu(tc, &pool->active_thins, list) 1543 while (tc) {
1498 process_thin_deferred_bios(tc); 1544 process_thin_deferred_bios(tc);
1499 rcu_read_unlock(); 1545 tc = get_next_thin(pool, tc);
1546 }
1500 1547
1501 /* 1548 /*
1502 * If there are any deferred flush bios, we must commit 1549 * If there are any deferred flush bios, we must commit
@@ -3061,11 +3108,25 @@ static struct target_type pool_target = {
3061/*---------------------------------------------------------------- 3108/*----------------------------------------------------------------
3062 * Thin target methods 3109 * Thin target methods
3063 *--------------------------------------------------------------*/ 3110 *--------------------------------------------------------------*/
3111static void thin_get(struct thin_c *tc)
3112{
3113 atomic_inc(&tc->refcount);
3114}
3115
3116static void thin_put(struct thin_c *tc)
3117{
3118 if (atomic_dec_and_test(&tc->refcount))
3119 complete(&tc->can_destroy);
3120}
3121
3064static void thin_dtr(struct dm_target *ti) 3122static void thin_dtr(struct dm_target *ti)
3065{ 3123{
3066 struct thin_c *tc = ti->private; 3124 struct thin_c *tc = ti->private;
3067 unsigned long flags; 3125 unsigned long flags;
3068 3126
3127 thin_put(tc);
3128 wait_for_completion(&tc->can_destroy);
3129
3069 spin_lock_irqsave(&tc->pool->lock, flags); 3130 spin_lock_irqsave(&tc->pool->lock, flags);
3070 list_del_rcu(&tc->list); 3131 list_del_rcu(&tc->list);
3071 spin_unlock_irqrestore(&tc->pool->lock, flags); 3132 spin_unlock_irqrestore(&tc->pool->lock, flags);
@@ -3192,6 +3253,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
3192 3253
3193 mutex_unlock(&dm_thin_pool_table.mutex); 3254 mutex_unlock(&dm_thin_pool_table.mutex);
3194 3255
3256 atomic_set(&tc->refcount, 1);
3257 init_completion(&tc->can_destroy);
3258
3195 spin_lock_irqsave(&tc->pool->lock, flags); 3259 spin_lock_irqsave(&tc->pool->lock, flags);
3196 list_add_tail_rcu(&tc->list, &tc->pool->active_thins); 3260 list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
3197 spin_unlock_irqrestore(&tc->pool->lock, flags); 3261 spin_unlock_irqrestore(&tc->pool->lock, flags);