diff options
author | Mike Snitzer <snitzer@redhat.com> | 2013-12-20 14:27:28 -0500 |
---|---|---|
committer | Mike Snitzer <snitzer@redhat.com> | 2014-01-07 10:14:31 -0500 |
commit | 8b64e881eb40ac8b9bfcbce068a97eef819044ee (patch) | |
tree | 5340508878a1babeed49b841c84c061c847a719a | |
parent | 6d16202be7bca169771e2cec140a6c6c53ce9df5 (diff) |
dm thin: fix set_pool_mode exposed pool operation races
The pool mode must not be switched until after the corresponding pool
process_* methods have been established. Otherwise, because
set_pool_mode() isn't interlocked with the IO path for performance
reasons, the IO path can end up executing process_* operations that
don't match the mode. This patch eliminates problems like the following
(as seen on really fast PCIe SSD storage when transitioning the pool's
mode from PM_READ_ONLY to PM_WRITE):
kernel: device-mapper: thin: 253:2: reached low water mark for data device: sending event.
kernel: device-mapper: thin: 253:2: no free data space available.
kernel: device-mapper: thin: 253:2: switching pool to read-only mode
kernel: device-mapper: thin: 253:2: switching pool to write mode
kernel: ------------[ cut here ]------------
kernel: WARNING: CPU: 11 PID: 7564 at drivers/md/dm-thin.c:995 handle_unserviceable_bio+0x146/0x160 [dm_thin_pool]()
...
kernel: Workqueue: dm-thin do_worker [dm_thin_pool]
kernel: 00000000000003e3 ffff880308831cc8 ffffffff8152ebcb 00000000000003e3
kernel: 0000000000000000 ffff880308831d08 ffffffff8104c46c ffff88032502a800
kernel: ffff880036409000 ffff88030ec7ce00 0000000000000001 00000000ffffffc3
kernel: Call Trace:
kernel: [<ffffffff8152ebcb>] dump_stack+0x49/0x5e
kernel: [<ffffffff8104c46c>] warn_slowpath_common+0x8c/0xc0
kernel: [<ffffffff8104c4ba>] warn_slowpath_null+0x1a/0x20
kernel: [<ffffffffa001e2c6>] handle_unserviceable_bio+0x146/0x160 [dm_thin_pool]
kernel: [<ffffffffa001f276>] process_bio_read_only+0x136/0x180 [dm_thin_pool]
kernel: [<ffffffffa0020b75>] process_deferred_bios+0xc5/0x230 [dm_thin_pool]
kernel: [<ffffffffa0020d31>] do_worker+0x51/0x60 [dm_thin_pool]
kernel: [<ffffffff81067823>] process_one_work+0x183/0x490
kernel: [<ffffffff81068c70>] worker_thread+0x120/0x3a0
kernel: [<ffffffff81068b50>] ? manage_workers+0x160/0x160
kernel: [<ffffffff8106e86e>] kthread+0xce/0xf0
kernel: [<ffffffff8106e7a0>] ? kthread_freezable_should_stop+0x70/0x70
kernel: [<ffffffff8153b3ec>] ret_from_fork+0x7c/0xb0
kernel: [<ffffffff8106e7a0>] ? kthread_freezable_should_stop+0x70/0x70
kernel: ---[ end trace 3f00528e08ffa55c ]---
kernel: device-mapper: thin: pool mode is PM_WRITE not PM_READ_ONLY like expected!?
dm-thin.c:995 was the WARN_ON_ONCE(get_pool_mode(pool) != PM_READ_ONLY);
at the top of handle_unserviceable_bio(). And as the additional
debugging I had conveys: the pool mode was _not_ PM_READ_ONLY like
expected, it was already PM_WRITE, yet pool->process_bio was still set
to process_bio_read_only().
Also, while fixing this up, reduce logging of redundant pool mode
transitions by checking new_mode is different from old_mode.
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
-rw-r--r-- | drivers/md/dm-thin.c | 40 |
1 files changed, 27 insertions, 13 deletions
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a55c5ebb4031..d2328bb05192 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c | |||
@@ -1392,16 +1392,16 @@ static enum pool_mode get_pool_mode(struct pool *pool) | |||
1392 | return pool->pf.mode; | 1392 | return pool->pf.mode; |
1393 | } | 1393 | } |
1394 | 1394 | ||
1395 | static void set_pool_mode(struct pool *pool, enum pool_mode mode) | 1395 | static void set_pool_mode(struct pool *pool, enum pool_mode new_mode) |
1396 | { | 1396 | { |
1397 | int r; | 1397 | int r; |
1398 | enum pool_mode old_mode = pool->pf.mode; | ||
1398 | 1399 | ||
1399 | pool->pf.mode = mode; | 1400 | switch (new_mode) { |
1400 | |||
1401 | switch (mode) { | ||
1402 | case PM_FAIL: | 1401 | case PM_FAIL: |
1403 | DMERR("%s: switching pool to failure mode", | 1402 | if (old_mode != new_mode) |
1404 | dm_device_name(pool->pool_md)); | 1403 | DMERR("%s: switching pool to failure mode", |
1404 | dm_device_name(pool->pool_md)); | ||
1405 | dm_pool_metadata_read_only(pool->pmd); | 1405 | dm_pool_metadata_read_only(pool->pmd); |
1406 | pool->process_bio = process_bio_fail; | 1406 | pool->process_bio = process_bio_fail; |
1407 | pool->process_discard = process_bio_fail; | 1407 | pool->process_discard = process_bio_fail; |
@@ -1410,13 +1410,15 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) | |||
1410 | break; | 1410 | break; |
1411 | 1411 | ||
1412 | case PM_READ_ONLY: | 1412 | case PM_READ_ONLY: |
1413 | DMERR("%s: switching pool to read-only mode", | 1413 | if (old_mode != new_mode) |
1414 | dm_device_name(pool->pool_md)); | 1414 | DMERR("%s: switching pool to read-only mode", |
1415 | dm_device_name(pool->pool_md)); | ||
1415 | r = dm_pool_abort_metadata(pool->pmd); | 1416 | r = dm_pool_abort_metadata(pool->pmd); |
1416 | if (r) { | 1417 | if (r) { |
1417 | DMERR("%s: aborting transaction failed", | 1418 | DMERR("%s: aborting transaction failed", |
1418 | dm_device_name(pool->pool_md)); | 1419 | dm_device_name(pool->pool_md)); |
1419 | set_pool_mode(pool, PM_FAIL); | 1420 | new_mode = PM_FAIL; |
1421 | set_pool_mode(pool, new_mode); | ||
1420 | } else { | 1422 | } else { |
1421 | dm_pool_metadata_read_only(pool->pmd); | 1423 | dm_pool_metadata_read_only(pool->pmd); |
1422 | pool->process_bio = process_bio_read_only; | 1424 | pool->process_bio = process_bio_read_only; |
@@ -1427,6 +1429,9 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) | |||
1427 | break; | 1429 | break; |
1428 | 1430 | ||
1429 | case PM_WRITE: | 1431 | case PM_WRITE: |
1432 | if (old_mode != new_mode) | ||
1433 | DMINFO("%s: switching pool to write mode", | ||
1434 | dm_device_name(pool->pool_md)); | ||
1430 | dm_pool_metadata_read_write(pool->pmd); | 1435 | dm_pool_metadata_read_write(pool->pmd); |
1431 | pool->process_bio = process_bio; | 1436 | pool->process_bio = process_bio; |
1432 | pool->process_discard = process_discard; | 1437 | pool->process_discard = process_discard; |
@@ -1434,6 +1439,8 @@ static void set_pool_mode(struct pool *pool, enum pool_mode mode) | |||
1434 | pool->process_prepared_discard = process_prepared_discard; | 1439 | pool->process_prepared_discard = process_prepared_discard; |
1435 | break; | 1440 | break; |
1436 | } | 1441 | } |
1442 | |||
1443 | pool->pf.mode = new_mode; | ||
1437 | } | 1444 | } |
1438 | 1445 | ||
1439 | /* | 1446 | /* |
@@ -1677,6 +1684,17 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) | |||
1677 | enum pool_mode new_mode = pt->adjusted_pf.mode; | 1684 | enum pool_mode new_mode = pt->adjusted_pf.mode; |
1678 | 1685 | ||
1679 | /* | 1686 | /* |
1687 | * Don't change the pool's mode until set_pool_mode() below. | ||
1688 | * Otherwise the pool's process_* function pointers may | ||
1689 | * not match the desired pool mode. | ||
1690 | */ | ||
1691 | pt->adjusted_pf.mode = old_mode; | ||
1692 | |||
1693 | pool->ti = ti; | ||
1694 | pool->pf = pt->adjusted_pf; | ||
1695 | pool->low_water_blocks = pt->low_water_blocks; | ||
1696 | |||
1697 | /* | ||
1680 | * If we were in PM_FAIL mode, rollback of metadata failed. We're | 1698 | * If we were in PM_FAIL mode, rollback of metadata failed. We're |
1681 | * not going to recover without a thin_repair. So we never let the | 1699 | * not going to recover without a thin_repair. So we never let the |
1682 | * pool move out of the old mode. On the other hand a PM_READ_ONLY | 1700 | * pool move out of the old mode. On the other hand a PM_READ_ONLY |
@@ -1686,10 +1704,6 @@ static int bind_control_target(struct pool *pool, struct dm_target *ti) | |||
1686 | if (old_mode == PM_FAIL) | 1704 | if (old_mode == PM_FAIL) |
1687 | new_mode = old_mode; | 1705 | new_mode = old_mode; |
1688 | 1706 | ||
1689 | pool->ti = ti; | ||
1690 | pool->low_water_blocks = pt->low_water_blocks; | ||
1691 | pool->pf = pt->adjusted_pf; | ||
1692 | |||
1693 | set_pool_mode(pool, new_mode); | 1707 | set_pool_mode(pool, new_mode); |
1694 | 1708 | ||
1695 | return 0; | 1709 | return 0; |