aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2010-09-14 14:26:27 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2010-10-14 12:38:50 -0400
commite9e6f3ec535d7b7c9e2ca64ad691e743e7d3c2f0 (patch)
treecbc17d81b9d937b4fc515548f30f5ed00be193ee /drivers/block
parent22cc37a943832c948808884604ec6f5ff2594c1d (diff)
drbd: fix for possible deadlock on IO error during resync
Scenario: Something (say, flush-147:0) is in drbd_al_begin_io, holding a local_cnt, waiting for the resync to make progress. Disk fails, worker in after_state_ch does drbd_rs_cancel_all, then waits for local_cnt to drop to zero. flush-147:0 is woken by drbd_rs_cancel_all, needs to write an AL transaction, and queues that on the worker. Deadlock. Fix: do not wait in the worker, have put_ldev() trigger the state change D_FAILED -> D_DISKLESS when necessary. put_ldev() cannot do the state change directly, as it may or may not already hold various spinlocks. We queue a short work instead. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_int.h8
-rw-r--r--drivers/block/drbd/drbd_main.c68
2 files changed, 54 insertions, 22 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 8ab6fed39539..c07c370c4c82 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -852,6 +852,7 @@ enum {
852 BITMAP_IO, /* suspend application io; 852 BITMAP_IO, /* suspend application io;
853 once no more io in flight, start bitmap io */ 853 once no more io in flight, start bitmap io */
854 BITMAP_IO_QUEUED, /* Started bitmap IO */ 854 BITMAP_IO_QUEUED, /* Started bitmap IO */
855 GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */
855 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ 856 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
856 NET_CONGESTED, /* The data socket is congested */ 857 NET_CONGESTED, /* The data socket is congested */
857 858
@@ -976,6 +977,7 @@ struct drbd_conf {
976 unsigned int ko_count; 977 unsigned int ko_count;
977 struct drbd_work resync_work, 978 struct drbd_work resync_work,
978 unplug_work, 979 unplug_work,
980 go_diskless,
979 md_sync_work; 981 md_sync_work;
980 struct timer_list resync_timer; 982 struct timer_list resync_timer;
981 struct timer_list md_sync_timer; 983 struct timer_list md_sync_timer;
@@ -1278,6 +1280,7 @@ extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
1278extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); 1280extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1279extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); 1281extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1280extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); 1282extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
1283extern void drbd_go_diskless(struct drbd_conf *mdev);
1281 1284
1282 1285
1283/* Meta data layout 1286/* Meta data layout
@@ -2123,8 +2126,11 @@ static inline void put_ldev(struct drbd_conf *mdev)
2123 int i = atomic_dec_return(&mdev->local_cnt); 2126 int i = atomic_dec_return(&mdev->local_cnt);
2124 __release(local); 2127 __release(local);
2125 D_ASSERT(i >= 0); 2128 D_ASSERT(i >= 0);
2126 if (i == 0) 2129 if (i == 0) {
2130 if (mdev->state.disk == D_FAILED)
2131 drbd_go_diskless(mdev);
2127 wake_up(&mdev->misc_wait); 2132 wake_up(&mdev->misc_wait);
2133 }
2128} 2134}
2129 2135
2130#ifndef __CHECKER__ 2136#ifndef __CHECKER__
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 63f45d730f3f..f89b97466d07 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -77,6 +77,7 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
77static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused); 77static int w_md_sync(struct drbd_conf *mdev, struct drbd_work *w, int unused);
78static void md_sync_timer_fn(unsigned long data); 78static void md_sync_timer_fn(unsigned long data);
79static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused); 79static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused);
80static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused);
80 81
81MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, " 82MODULE_AUTHOR("Philipp Reisner <phil@linbit.com>, "
82 "Lars Ellenberg <lars@linbit.com>"); 83 "Lars Ellenberg <lars@linbit.com>");
@@ -1363,42 +1364,46 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1363 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) 1364 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1364 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); 1365 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate");
1365 1366
1367 /* first half of local IO error */
1366 if (os.disk > D_FAILED && ns.disk == D_FAILED) { 1368 if (os.disk > D_FAILED && ns.disk == D_FAILED) {
1367 enum drbd_io_error_p eh; 1369 enum drbd_io_error_p eh = EP_PASS_ON;
1370
1371 if (drbd_send_state(mdev))
1372 dev_warn(DEV, "Notified peer that my disk is broken.\n");
1373 else
1374 dev_err(DEV, "Sending state for drbd_io_error() failed\n");
1375
1376 drbd_rs_cancel_all(mdev);
1368 1377
1369 eh = EP_PASS_ON;
1370 if (get_ldev_if_state(mdev, D_FAILED)) { 1378 if (get_ldev_if_state(mdev, D_FAILED)) {
1371 eh = mdev->ldev->dc.on_io_error; 1379 eh = mdev->ldev->dc.on_io_error;
1372 put_ldev(mdev); 1380 put_ldev(mdev);
1373 } 1381 }
1382 if (eh == EP_CALL_HELPER)
1383 drbd_khelper(mdev, "local-io-error");
1384 }
1374 1385
1375 drbd_rs_cancel_all(mdev); 1386
1376 /* since get_ldev() only works as long as disk>=D_INCONSISTENT, 1387 /* second half of local IO error handling,
1377 and it is D_DISKLESS here, local_cnt can only go down, it can 1388 * after local_cnt references have reached zero: */
1378 not increase... It will reach zero */ 1389 if (os.disk == D_FAILED && ns.disk == D_DISKLESS) {
1379 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
1380 mdev->rs_total = 0; 1390 mdev->rs_total = 0;
1381 mdev->rs_failed = 0; 1391 mdev->rs_failed = 0;
1382 atomic_set(&mdev->rs_pending_cnt, 0); 1392 atomic_set(&mdev->rs_pending_cnt, 0);
1383
1384 spin_lock_irq(&mdev->req_lock);
1385 _drbd_set_state(_NS(mdev, disk, D_DISKLESS), CS_HARD, NULL);
1386 spin_unlock_irq(&mdev->req_lock);
1387
1388 if (eh == EP_CALL_HELPER)
1389 drbd_khelper(mdev, "local-io-error");
1390 } 1393 }
1391 1394
1392 if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) { 1395 if (os.disk > D_DISKLESS && ns.disk == D_DISKLESS) {
1396 int c = atomic_read(&mdev->local_cnt);
1393 1397
1394 if (os.disk == D_FAILED) /* && ns.disk == D_DISKLESS*/ { 1398 if (drbd_send_state(mdev))
1395 if (drbd_send_state(mdev)) 1399 dev_warn(DEV, "Notified peer that I detached my disk.\n");
1396 dev_warn(DEV, "Notified peer that my disk is broken.\n"); 1400 else
1397 else 1401 dev_err(DEV, "Sending state for detach failed\n");
1398 dev_err(DEV, "Sending state in drbd_io_error() failed\n");
1399 }
1400 1402
1401 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt)); 1403 if (c != 0) {
1404 dev_err(DEV, "Logic bug, local_cnt=%d, but should be 0\n", c);
1405 wait_event(mdev->misc_wait, !atomic_read(&mdev->local_cnt));
1406 }
1402 lc_destroy(mdev->resync); 1407 lc_destroy(mdev->resync);
1403 mdev->resync = NULL; 1408 mdev->resync = NULL;
1404 lc_destroy(mdev->act_log); 1409 lc_destroy(mdev->act_log);
@@ -2803,11 +2808,13 @@ void drbd_init_set_defaults(struct drbd_conf *mdev)
2803 INIT_LIST_HEAD(&mdev->meta.work.q); 2808 INIT_LIST_HEAD(&mdev->meta.work.q);
2804 INIT_LIST_HEAD(&mdev->resync_work.list); 2809 INIT_LIST_HEAD(&mdev->resync_work.list);
2805 INIT_LIST_HEAD(&mdev->unplug_work.list); 2810 INIT_LIST_HEAD(&mdev->unplug_work.list);
2811 INIT_LIST_HEAD(&mdev->go_diskless.list);
2806 INIT_LIST_HEAD(&mdev->md_sync_work.list); 2812 INIT_LIST_HEAD(&mdev->md_sync_work.list);
2807 INIT_LIST_HEAD(&mdev->bm_io_work.w.list); 2813 INIT_LIST_HEAD(&mdev->bm_io_work.w.list);
2808 2814
2809 mdev->resync_work.cb = w_resync_inactive; 2815 mdev->resync_work.cb = w_resync_inactive;
2810 mdev->unplug_work.cb = w_send_write_hint; 2816 mdev->unplug_work.cb = w_send_write_hint;
2817 mdev->go_diskless.cb = w_go_diskless;
2811 mdev->md_sync_work.cb = w_md_sync; 2818 mdev->md_sync_work.cb = w_md_sync;
2812 mdev->bm_io_work.w.cb = w_bitmap_io; 2819 mdev->bm_io_work.w.cb = w_bitmap_io;
2813 init_timer(&mdev->resync_timer); 2820 init_timer(&mdev->resync_timer);
@@ -2885,6 +2892,7 @@ void drbd_mdev_cleanup(struct drbd_conf *mdev)
2885 D_ASSERT(list_empty(&mdev->meta.work.q)); 2892 D_ASSERT(list_empty(&mdev->meta.work.q));
2886 D_ASSERT(list_empty(&mdev->resync_work.list)); 2893 D_ASSERT(list_empty(&mdev->resync_work.list));
2887 D_ASSERT(list_empty(&mdev->unplug_work.list)); 2894 D_ASSERT(list_empty(&mdev->unplug_work.list));
2895 D_ASSERT(list_empty(&mdev->go_diskless.list));
2888 2896
2889} 2897}
2890 2898
@@ -3712,6 +3720,24 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3712 return 1; 3720 return 1;
3713} 3721}
3714 3722
3723static int w_go_diskless(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3724{
3725 D_ASSERT(mdev->state.disk == D_FAILED);
3726 D_ASSERT(atomic_read(&mdev->local_cnt) == 0);
3727
3728 drbd_force_state(mdev, NS(disk, D_DISKLESS));
3729
3730 clear_bit(GO_DISKLESS, &mdev->flags);
3731 return 1;
3732}
3733
3734void drbd_go_diskless(struct drbd_conf *mdev)
3735{
3736 D_ASSERT(mdev->state.disk == D_FAILED);
3737 if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
3738 drbd_queue_work_front(&mdev->data.work, &mdev->go_diskless);
3739}
3740
3715/** 3741/**
3716 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap 3742 * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
3717 * @mdev: DRBD device. 3743 * @mdev: DRBD device.