aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Snitzer <snitzer@redhat.com>2013-12-20 14:27:28 -0500
committerMike Snitzer <snitzer@redhat.com>2014-01-07 10:14:31 -0500
commit8b64e881eb40ac8b9bfcbce068a97eef819044ee (patch)
tree5340508878a1babeed49b841c84c061c847a719a
parent6d16202be7bca169771e2cec140a6c6c53ce9df5 (diff)
dm thin: fix set_pool_mode exposed pool operation races
The pool mode must not be switched until after the corresponding pool process_* methods have been established. Otherwise, because set_pool_mode() isn't interlocked with the IO path for performance reasons, the IO path can end up executing process_* operations that don't match the mode. This patch eliminates problems like the following (as seen on really fast PCIe SSD storage when transitioning the pool's mode from PM_READ_ONLY to PM_WRITE): kernel: device-mapper: thin: 253:2: reached low water mark for data device: sending event. kernel: device-mapper: thin: 253:2: no free data space available. kernel: device-mapper: thin: 253:2: switching pool to read-only mode kernel: device-mapper: thin: 253:2: switching pool to write mode kernel: ------------[ cut here ]------------ kernel: WARNING: CPU: 11 PID: 7564 at drivers/md/dm-thin.c:995 handle_unserviceable_bio+0x146/0x160 [dm_thin_pool]() ... kernel: Workqueue: dm-thin do_worker [dm_thin_pool] kernel: 00000000000003e3 ffff880308831cc8 ffffffff8152ebcb 00000000000003e3 kernel: 0000000000000000 ffff880308831d08 ffffffff8104c46c ffff88032502a800 kernel: ffff880036409000 ffff88030ec7ce00 0000000000000001 00000000ffffffc3 kernel: Call Trace: kernel: [<ffffffff8152ebcb>] dump_stack+0x49/0x5e kernel: [<ffffffff8104c46c>] warn_slowpath_common+0x8c/0xc0 kernel: [<ffffffff8104c4ba>] warn_slowpath_null+0x1a/0x20 kernel: [<ffffffffa001e2c6>] handle_unserviceable_bio+0x146/0x160 [dm_thin_pool] kernel: [<ffffffffa001f276>] process_bio_read_only+0x136/0x180 [dm_thin_pool] kernel: [<ffffffffa0020b75>] process_deferred_bios+0xc5/0x230 [dm_thin_pool] kernel: [<ffffffffa0020d31>] do_worker+0x51/0x60 [dm_thin_pool] kernel: [<ffffffff81067823>] process_one_work+0x183/0x490 kernel: [<ffffffff81068c70>] worker_thread+0x120/0x3a0 kernel: [<ffffffff81068b50>] ? manage_workers+0x160/0x160 kernel: [<ffffffff8106e86e>] kthread+0xce/0xf0 kernel: [<ffffffff8106e7a0>] ? kthread_freezable_should_stop+0x70/0x70 kernel: [<ffffffff8153b3ec>] ret_from_fork+0x7c/0xb0 kernel: [<ffffffff8106e7a0>] ? kthread_freezable_should_stop+0x70/0x70 kernel: ---[ end trace 3f00528e08ffa55c ]--- kernel: device-mapper: thin: pool mode is PM_WRITE not PM_READ_ONLY like expected!? dm-thin.c:995 was the WARN_ON_ONCE(get_pool_mode(pool) != PM_READ_ONLY); at the top of handle_unserviceable_bio(). And as the additional debugging I had conveys: the pool mode was _not_ PM_READ_ONLY like expected, it was already PM_WRITE, yet pool->process_bio was still set to process_bio_read_only(). Also, while fixing this up, reduce logging of redundant pool mode transitions by checking new_mode is different from old_mode. Signed-off-by: Mike Snitzer <snitzer@redhat.com> Cc: stable@vger.kernel.org
-rw-r--r--drivers/md/dm-thin.c40
1 files changed, 27 insertions, 13 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index a55c5ebb4031..d2328bb05192 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1392,16 +1392,16 @@ static enum pool_mode get_pool_mode(struct pool *pool)
1392 return pool->pf.mode; 1392 return pool->pf.mode;
1393} 1393}
1394 1394
1395static void set_pool_mode(struct pool *pool, enum pool_mode mode) 1395static void set_pool_mode(struct pool *pool, enum pool_mode new_mode)
1396{ 1396{
1397 int r; 1397 int r;
1398 enum pool_mode old_mode = pool->pf.mode;
1398 1399
1399 pool->pf.mode = mode; 1400 switch (new_mode) {
1400
1401 switch (mode) {
1402 case PM_FAIL: 1401 case PM_FAIL:
1403 DMERR("%s: switching pool to failure mode", 1402 if (old_mode != new_mode)
1404 dm_device_name(pool->pool_md)); 1403 DMERR("%s: switching pool to failure mode",
1404 dm_device_name(pool->pool_md));
1405 dm_pool_metadata_read_only(pool->pmd); 1405 dm_pool_metadata_read_only(pool->pmd);
1406 pool->process_bio = process_bio_fail; 1406 pool->process_bio = process_bio_fail;
1407 pool->process_discard = process_bio_fail; 1407 pool->process_discard = process_bio_fail;
@@ -1410,13 +1410,15 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode)
1410 break; 1410 break;
1411 1411
1412 case PM_READ_ONLY: 1412 case PM_READ_ONLY:
1413 DMERR("%s: switching pool to read-only mode", 1413 if (old_mode != new_mode)
1414 dm_device_name(pool->pool_md)); 1414 DMERR("%s: switching pool to read-only mode",
1415 dm_device_name(pool->pool_md));
1415 r = dm_pool_abort_metadata(pool->pmd); 1416 r = dm_pool_abort_metadata(pool->pmd);
1416 if (r) { 1417 if (r) {
1417 DMERR("%s: aborting transaction failed", 1418 DMERR("%s: aborting transaction failed",
1418 dm_device_name(pool->pool_md)); 1419 dm_device_name(pool->pool_md));
1419 set_pool_mode(pool, PM_FAIL); 1420 new_mode = PM_FAIL;
1421 set_pool_mode(pool, new_mode);
1420 } else { 1422 } else {
1421 dm_pool_metadata_read_only(pool->pmd); 1423 dm_pool_metadata_read_only(pool->pmd);
1422 pool->process_bio = process_bio_read_only; 1424 pool->process_bio = process_bio_read_only;
@@ -1427,6 +1429,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode)
1427 break; 1429 break;
1428 1430
1429 case PM_WRITE: 1431 case PM_WRITE:
1432 if (old_mode != new_mode)
1433 DMINFO("%s: switching pool to write mode",
1434 dm_device_name(pool->pool_md));
1430 dm_pool_metadata_read_write(pool->pmd); 1435 dm_pool_metadata_read_write(pool->pmd);
1431 pool->process_bio = process_bio; 1436 pool->process_bio = process_bio;
1432 pool->process_discard = process_discard; 1437 pool->process_discard = process_discard;
@@ -1434,6 +1439,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode)
1434 pool->process_prepared_discard = process_prepared_discard; 1439 pool->process_prepared_discard = process_prepared_discard;
1435 break; 1440 break;
1436 } 1441 }
1442
1443 pool->pf.mode = new_mode;
1437} 1444}
1438 1445
1439/* 1446/*
@@ -1677,6 +1684,17 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
1677 enum pool_mode new_mode = pt->adjusted_pf.mode; 1684 enum pool_mode new_mode = pt->adjusted_pf.mode;
1678 1685
1679 /* 1686 /*
1687 * Don't change the pool's mode until set_pool_mode() below.
1688 * Otherwise the pool's process_* function pointers may
1689 * not match the desired pool mode.
1690 */
1691 pt->adjusted_pf.mode = old_mode;
1692
1693 pool->ti = ti;
1694 pool->pf = pt->adjusted_pf;
1695 pool->low_water_blocks = pt->low_water_blocks;
1696
1697 /*
1680 * If we were in PM_FAIL mode, rollback of metadata failed. We're 1698 * If we were in PM_FAIL mode, rollback of metadata failed. We're
1681 * not going to recover without a thin_repair. So we never let the 1699 * not going to recover without a thin_repair. So we never let the
1682 * pool move out of the old mode. On the other hand a PM_READ_ONLY 1700 * pool move out of the old mode. On the other hand a PM_READ_ONLY
@@ -1686,10 +1704,6 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti)
1686 if (old_mode == PM_FAIL) 1704 if (old_mode == PM_FAIL)
1687 new_mode = old_mode; 1705 new_mode = old_mode;
1688 1706
1689 pool->ti = ti;
1690 pool->low_water_blocks = pt->low_water_blocks;
1691 pool->pf = pt->adjusted_pf;
1692
1693 set_pool_mode(pool, new_mode); 1707 set_pool_mode(pool, new_mode);
1694 1708
1695 return 0; 1709 return 0;