diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2010-09-14 14:26:27 -0400 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2010-10-14 12:38:50 -0400 |
commit | e9e6f3ec535d7b7c9e2ca64ad691e743e7d3c2f0 (patch) | |
tree | cbc17d81b9d937b4fc515548f30f5ed00be193ee /drivers/block | |
parent | 22cc37a943832c948808884604ec6f5ff2594c1d (diff) |
drbd: fix for possible deadlock on IO error during resync
Scenario:
Something (say, flush-147:0) is in drbd_al_begin_io,
holding a local_cnt, waiting for the resync to make progress.
Disk fails, worker in after_state_ch does drbd_rs_cancel_all,
then waits for local_cnt to drop to zero.
flush-147:0 is woken by drbd_rs_cancel_all, needs to write an AL
transaction, and queues that on the worker.
Deadlock.
Fix: do not wait in the worker, have put_ldev() trigger the
state change D_FAILED -> D_DISKLESS when necessary.
put_ldev() cannot do the state change directly, as it may or may not
already hold various spinlocks. We queue a short work instead.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 8 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_main.c | 68 |
2 files changed, 54 insertions, 22 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 8ab6fed39539..c07c370c4c82 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -852,6 +852,7 @@ enum { | |||
852 | BITMAP_IO, /* suspend application io; | 852 | BITMAP_IO, /* suspend application io; |
853 | once no more io in flight, start bitmap io */ | 853 | once no more io in flight, start bitmap io */ |
854 | BITMAP_IO_QUEUED, /* Started bitmap IO */ | 854 | BITMAP_IO_QUEUED, /* Started bitmap IO */ |
855 | GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */ | ||
855 | RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ | 856 | RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ |
856 | NET_CONGESTED, /* The data socket is congested */ | 857 | NET_CONGESTED, /* The data socket is congested */ |
857 | 858 | ||
@@ -976,6 +977,7 @@ struct drbd_conf { | |||
976 | unsigned int ko_count; | 977 | unsigned int ko_count; |
977 | struct drbd_work resync_work, | 978 | struct drbd_work resync_work, |
978 | unplug_work, | 979 | unplug_work, |
980 | go_diskless, | ||
979 | md_sync_work; | 981 | md_sync_work; |
980 | struct timer_list resync_timer; | 982 | struct timer_list resync_timer; |
981 | struct timer_list md_sync_timer; | 983 | struct timer_list md_sync_timer; |
@@ -1278,6 +1280,7 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, | |||
1278 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); | 1280 | extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); |
1279 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); | 1281 | extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); |
1280 | extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); | 1282 | extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); |
1283 | extern void drbd_go_diskless(struct drbd_conf *mdev); | ||
1281 | 1284 | ||
1282 | 1285 | ||
1283 | /* Meta data layout | 1286 | /* Meta data layout |
@@ -2123,8 +2126,11 @@ static inline void put_ldev(struct drbd_conf *mdev) | |||
2123 | int i = atomic_dec_return(&mdev->local_cnt); | 2126 | int i = atomic_dec_return(&mdev->local_cnt); |
2124 | __release(local); | 2127 | __release(local); |
2125 | D_ASSERT(i >= 0); | 2128 | D_ASSERT(i >= 0); |
2126 | if (i == 0) | 2129 | if (i == 0) { |
2130 | if (mdev->state.disk == D_FAILED) | ||
2131 | drbd_go_diskless(mdev); | ||
2127 | wake_up(&mdev->misc_wait); | 2132 | wake_up(&mdev->misc_wait); |
2133 | } | ||
2128 | } | 2134 | } |
2129 | 2135 | ||
2130 | #ifndef __CHECKER__ | 2136 | #ifndef __CHECKER__ |
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 63f45d730f3f..f89b97466d07 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c | |||
@@ -77,6 +77,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
77 | static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); | 77 | static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); |
78 | static void md_sync_timer_fn(unsigned long data); | 78 | static void md_sync_timer_fn(unsigned long data); |
79 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); | 79 | static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); |
80 | static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused); | ||
80 | 81 | ||
81 | MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " | 82 | MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " |
82 | "Lars Ellenberg <lars@linbit.com>"); | 83 | "Lars Ellenberg <lars@linbit.com>"); |
@@ -1363,42 +1364,46 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os, | |||
1363 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) | 1364 | os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) |
1364 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); | 1365 | drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); |
1365 | 1366 | ||
1367 | /* first half of local IO error */ | ||
1366 | if (os.disk > D_FAILED && ns.disk == D_FAILED) { | 1368 | if (os.disk > D_FAILED && ns.disk == D_FAILED) { |
1367 | enum drbd_io_error_p eh; | 1369 | enum drbd_io_error_p eh = EP_PASS_ON; |
1370 | |||
1371 | if (drbd_send_state(mdev)) | ||
1372 | dev_warn(DEV, "Notified peer that my disk is broken.\n"); | ||
1373 | else | ||
1374 | dev_err(DEV, "Sending state for drbd_io_error() failed\n"); | ||
1375 | |||
1376 | drbd_rs_cancel_all(mdev); | ||
1368 | 1377 | ||
1369 | eh = EP_PASS_ON; | ||
1370 | if (get_ldev_if_state(mdev, D_FAILED)) { | 1378 | if (get_ldev_if_state(mdev, D_FAILED)) { |
1371 | eh = mdev->ldev->dc.on_io_error; | 1379 | eh = mdev->ldev->dc.on_io_error; |
1372 | put_ldev(mdev); | 1380 | put_ldev(mdev); |
1373 | } | 1381 | } |
1382 | if (eh == EP_CALL_HELPER) | ||
1383 | drbd_khelper(mdev, "local-io-error"); | ||
1384 | } | ||
1374 | 1385 | ||
1375 | drbd_rs_cancel_all(mdev); | 1386 | |
1376 | /* since get_ldev() only works as long as disk>=D_INCONSISTENT, | 1387 | /* second half of local IO error handling, |
1377 | and it is D_DISKLESS here, local_cnt can only go down, it can | 1388 | * after local_cnt references have reached zero: */ |
1378 | not increase... It will reach zero */ | 1389 | if (os.disk == D_FAILED && ns.disk == D_DISKLESS) { |
1379 | wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); | ||
1380 | mdev->rs_total = 0; | 1390 | mdev->rs_total = 0; |
1381 | mdev->rs_failed = 0; | 1391 | mdev->rs_failed = 0; |
1382 | atomic_set(&mdev->rs_pending_cnt, 0); | 1392 | atomic_set(&mdev->rs_pending_cnt, 0); |
1383 | |||
1384 | spin_lock_irq(&mdev->req_lock); | ||
1385 | _drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL); | ||
1386 | spin_unlock_irq(&mdev->req_lock); | ||
1387 | |||
1388 | if (eh == EP_CALL_HELPER) | ||
1389 | drbd_khelper(mdev, "local-io-error"); | ||
1390 | } | 1393 | } |
1391 | 1394 | ||
1392 | if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) { | 1395 | if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) { |
1396 | int c = atomic_read(&mdev->local_cnt); | ||
1393 | 1397 | ||
1394 | if (os.disk == D_FAILED) /* && ns.disk == D_DISKLESS*/ { | 1398 | if (drbd_send_state(mdev)) |
1395 | if (drbd_send_state(mdev)) | 1399 | dev_warn(DEV, "Notified peer that I detached my disk.\n"); |
1396 | dev_warn(DEV, "Notified peer that my disk is broken.\n"); | 1400 | else |
1397 | else | 1401 | dev_err(DEV, "Sending state for detach failed\n"); |
1398 | dev_err(DEV, "Sending state in drbd_io_error() failed\n"); | ||
1399 | } | ||
1400 | 1402 | ||
1401 | wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); | 1403 | if (c != 0) { |
1404 | dev_err(DEV, "Logic bug, local_cnt=%d, but should be 0\n", c); | ||
1405 | wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); | ||
1406 | } | ||
1402 | lc_destroy(mdev->resync); | 1407 | lc_destroy(mdev->resync); |
1403 | mdev->resync = NULL; | 1408 | mdev->resync = NULL; |
1404 | lc_destroy(mdev->act_log); | 1409 | lc_destroy(mdev->act_log); |
@@ -2803,11 +2808,13 @@ void drbd_init_set_defaults(struct drbd_conf *mdev) | |||
2803 | INIT_LIST_HEAD(&mdev->meta.work.q); | 2808 | INIT_LIST_HEAD(&mdev->meta.work.q); |
2804 | INIT_LIST_HEAD(&mdev->resync_work.list); | 2809 | INIT_LIST_HEAD(&mdev->resync_work.list); |
2805 | INIT_LIST_HEAD(&mdev->unplug_work.list); | 2810 | INIT_LIST_HEAD(&mdev->unplug_work.list); |
2811 | INIT_LIST_HEAD(&mdev->go_diskless.list); | ||
2806 | INIT_LIST_HEAD(&mdev->md_sync_work.list); | 2812 | INIT_LIST_HEAD(&mdev->md_sync_work.list); |
2807 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); | 2813 | INIT_LIST_HEAD(&mdev->bm_io_work.w.list); |
2808 | 2814 | ||
2809 | mdev->resync_work.cb = w_resync_inactive; | 2815 | mdev->resync_work.cb = w_resync_inactive; |
2810 | mdev->unplug_work.cb = w_send_write_hint; | 2816 | mdev->unplug_work.cb = w_send_write_hint; |
2817 | mdev->go_diskless.cb = w_go_diskless; | ||
2811 | mdev->md_sync_work.cb = w_md_sync; | 2818 | mdev->md_sync_work.cb = w_md_sync; |
2812 | mdev->bm_io_work.w.cb = w_bitmap_io; | 2819 | mdev->bm_io_work.w.cb = w_bitmap_io; |
2813 | init_timer(&mdev->resync_timer); | 2820 | init_timer(&mdev->resync_timer); |
@@ -2885,6 +2892,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev) | |||
2885 | D_ASSERT(list_empty(&mdev->meta.work.q)); | 2892 | D_ASSERT(list_empty(&mdev->meta.work.q)); |
2886 | D_ASSERT(list_empty(&mdev->resync_work.list)); | 2893 | D_ASSERT(list_empty(&mdev->resync_work.list)); |
2887 | D_ASSERT(list_empty(&mdev->unplug_work.list)); | 2894 | D_ASSERT(list_empty(&mdev->unplug_work.list)); |
2895 | D_ASSERT(list_empty(&mdev->go_diskless.list)); | ||
2888 | 2896 | ||
2889 | } | 2897 | } |
2890 | 2898 | ||
@@ -3712,6 +3720,24 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused) | |||
3712 | return 1; | 3720 | return 1; |
3713 | } | 3721 | } |
3714 | 3722 | ||
3723 | static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused) | ||
3724 | { | ||
3725 | D_ASSERT(mdev->state.disk == D_FAILED); | ||
3726 | D_ASSERT(atomic_read(&mdev->local_cnt) == 0); | ||
3727 | |||
3728 | drbd_force_state(mdev, NS(disk, D_DISKLESS)); | ||
3729 | |||
3730 | clear_bit(GO_DISKLESS, &mdev->flags); | ||
3731 | return 1; | ||
3732 | } | ||
3733 | |||
3734 | void drbd_go_diskless(struct drbd_conf *mdev) | ||
3735 | { | ||
3736 | D_ASSERT(mdev->state.disk == D_FAILED); | ||
3737 | if (!test_and_set_bit(GO_DISKLESS, &mdev->flags)) | ||
3738 | drbd_queue_work_front(&mdev->data.work, &mdev->go_diskless); | ||
3739 | } | ||
3740 | |||
3715 | /** | 3741 | /** |
3716 | * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap | 3742 | * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap |
3717 | * @mdev: DRBD device. | 3743 | * @mdev: DRBD device. |