diff options
author | Joe Thornber <ejt@redhat.com> | 2014-04-08 06:29:01 -0400 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2014-04-08 10:18:35 -0400 |
commit | b10ebd34cccae1b431caf1be54919aede2be7cbe (patch) | |
tree | f8f848169991555d7ea0719740da37645e405c99 | |
parent | 5e3283e2920a0bd8a806964d80274b8756e0dd7f (diff) |
dm thin: fix rcu_read_lock being held in code that can sleep
Commit c140e1c4e23 ("dm thin: use per thin device deferred bio lists")
introduced the use of an rculist for all active thin devices. The use
of rcu_read_lock() in process_deferred_bios() can result in a BUG if a
dm_bio_prison_cell must be allocated as a side-effect of bio_detain():
BUG: sleeping function called from invalid context at mm/mempool.c:203
in_atomic(): 1, irqs_disabled(): 0, pid: 6, name: kworker/u8:0
3 locks held by kworker/u8:0/6:
#0: ("dm-" "thin"){.+.+..}, at: [<ffffffff8106be42>] process_one_work+0x192/0x550
#1: ((&pool->worker)){+.+...}, at: [<ffffffff8106be42>] process_one_work+0x192/0x550
#2: (rcu_read_lock){.+.+..}, at: [<ffffffff816360b5>] do_worker+0x5/0x4d0
We can't process deferred bios with the rcu lock held, since
dm_bio_prison_cell allocation may block if the bio-prison's cell mempool
is exhausted.
To fix:
- Introduce a refcount and completion field to each thin_c
- Add thin_get/put methods for adjusting the refcount. If the refcount
hits zero then the completion is triggered.
- Initialise refcount to 1 when creating thin_c
- When iterating the active_thins list we thin_get() whilst the rcu
lock is held.
- After the rcu lock is dropped we process the deferred bios for that
thin.
- When destroying a thin_c we thin_put() and then wait for the
completion -- to avoid a race between the worker thread iterating
from that thin_c and destroying the thin_c.
Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
-rw-r--r-- | drivers/md/dm-thin.c | 70 |
1 files changed, 67 insertions, 3 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index ae5fd0b9c75c..28fc282b61b2 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -232,6 +232,13 @@ struct thin_c { | |||
232 | struct bio_list deferred_bio_list; | 232 | struct bio_list deferred_bio_list; |
233 | struct bio_list retry_on_resume_list; | 233 | struct bio_list retry_on_resume_list; |
234 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ | 234 | struct rb_root sort_bio_list; /* sorted list of deferred bios */ |
235 | |||
236 | /* | ||
237 | * Ensures the thin is not destroyed until the worker has finished | ||
238 | * iterating the active_thins list. | ||
239 | */ | ||
240 | atomic_t refcount; | ||
241 | struct completion can_destroy; | ||
235 | }; | 242 | }; |
236 | 243 | ||
237 | /*----------------------------------------------------------------*/ | 244 | /*----------------------------------------------------------------*/ |
@@ -1486,6 +1493,45 @@ static void process_thin_deferred_bios(struct thin_c *tc) | |||
1486 | blk_finish_plug(&plug); | 1493 | blk_finish_plug(&plug); |
1487 | } | 1494 | } |
1488 | 1495 | ||
1496 | static void thin_get(struct thin_c *tc); | ||
1497 | static void thin_put(struct thin_c *tc); | ||
1498 | |||
1499 | /* | ||
1500 | * We can't hold rcu_read_lock() around code that can block. So we | ||
1501 | * find a thin with the rcu lock held; bump a refcount; then drop | ||
1502 | * the lock. | ||
1503 | */ | ||
1504 | static struct thin_c *get_first_thin(struct pool *pool) | ||
1505 | { | ||
1506 | struct thin_c *tc = NULL; | ||
1507 | |||
1508 | rcu_read_lock(); | ||
1509 | if (!list_empty(&pool->active_thins)) { | ||
1510 | tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list); | ||
1511 | thin_get(tc); | ||
1512 | } | ||
1513 | rcu_read_unlock(); | ||
1514 | |||
1515 | return tc; | ||
1516 | } | ||
1517 | |||
1518 | static struct thin_c *get_next_thin(struct pool *pool, struct thin_c *tc) | ||
1519 | { | ||
1520 | struct thin_c *old_tc = tc; | ||
1521 | |||
1522 | rcu_read_lock(); | ||
1523 | list_for_each_entry_continue_rcu(tc, &pool->active_thins, list) { | ||
1524 | thin_get(tc); | ||
1525 | thin_put(old_tc); | ||
1526 | rcu_read_unlock(); | ||
1527 | return tc; | ||
1528 | } | ||
1529 | thin_put(old_tc); | ||
1530 | rcu_read_unlock(); | ||
1531 | |||
1532 | return NULL; | ||
1533 | } | ||
1534 | |||
1489 | static void process_deferred_bios(struct pool *pool) | 1535 | static void process_deferred_bios(struct pool *pool) |
1490 | { | 1536 | { |
1491 | unsigned long flags; | 1537 | unsigned long flags; |
@@ -1493,10 +1539,11 @@ static void process_deferred_bios(struct pool *pool) | |||
1493 | struct bio_list bios; | 1539 | struct bio_list bios; |
1494 | struct thin_c *tc; | 1540 | struct thin_c *tc; |
1495 | 1541 | ||
1496 | rcu_read_lock(); | 1542 | tc = get_first_thin(pool); |
1497 | list_for_each_entry_rcu(tc, &pool->active_thins, list) | 1543 | while (tc) { |
1498 | process_thin_deferred_bios(tc); | 1544 | process_thin_deferred_bios(tc); |
1499 | rcu_read_unlock(); | 1545 | tc = get_next_thin(pool, tc); |
1546 | } | ||
1500 | 1547 | ||
1501 | /* | 1548 | /* |
1502 | * If there are any deferred flush bios, we must commit | 1549 | * If there are any deferred flush bios, we must commit |
@@ -3061,11 +3108,25 @@ static struct target_type pool_target = { | |||
3061 | /*---------------------------------------------------------------- | 3108 | /*---------------------------------------------------------------- |
3062 | * Thin target methods | 3109 | * Thin target methods |
3063 | *--------------------------------------------------------------*/ | 3110 | *--------------------------------------------------------------*/ |
3111 | static void thin_get(struct thin_c *tc) | ||
3112 | { | ||
3113 | atomic_inc(&tc->refcount); | ||
3114 | } | ||
3115 | |||
3116 | static void thin_put(struct thin_c *tc) | ||
3117 | { | ||
3118 | if (atomic_dec_and_test(&tc->refcount)) | ||
3119 | complete(&tc->can_destroy); | ||
3120 | } | ||
3121 | |||
3064 | static void thin_dtr(struct dm_target *ti) | 3122 | static void thin_dtr(struct dm_target *ti) |
3065 | { | 3123 | { |
3066 | struct thin_c *tc = ti->private; | 3124 | struct thin_c *tc = ti->private; |
3067 | unsigned long flags; | 3125 | unsigned long flags; |
3068 | 3126 | ||
3127 | thin_put(tc); | ||
3128 | wait_for_completion(&tc->can_destroy); | ||
3129 | |||
3069 | spin_lock_irqsave(&tc->pool->lock, flags); | 3130 | spin_lock_irqsave(&tc->pool->lock, flags); |
3070 | list_del_rcu(&tc->list); | 3131 | list_del_rcu(&tc->list); |
3071 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 3132 | spin_unlock_irqrestore(&tc->pool->lock, flags); |
@@ -3192,6 +3253,9 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) | |||
3192 | 3253 | ||
3193 | mutex_unlock(&dm_thin_pool_table.mutex); | 3254 | mutex_unlock(&dm_thin_pool_table.mutex); |
3194 | 3255 | ||
3256 | atomic_set(&tc->refcount, 1); | ||
3257 | init_completion(&tc->can_destroy); | ||
3258 | |||
3195 | spin_lock_irqsave(&tc->pool->lock, flags); | 3259 | spin_lock_irqsave(&tc->pool->lock, flags); |
3196 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); | 3260 | list_add_tail_rcu(&tc->list, &tc->pool->active_thins); |
3197 | spin_unlock_irqrestore(&tc->pool->lock, flags); | 3261 | spin_unlock_irqrestore(&tc->pool->lock, flags); |