aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block/drbd/drbd_int.h
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2010-10-16 06:13:47 -0400
committerPhilipp Reisner <philipp.reisner@linbit.com>2010-10-22 09:46:11 -0400
commit82f59cc6353889b426cf13b6596d5a3d100fa09e (patch)
tree6d5a678516334f0a37a56a509b84322a0352719b /drivers/block/drbd/drbd_int.h
parent3beec1d446fba335f07787636920892dd3b2c658 (diff)
drbd: fix potential deadlock on detach
If we have contention in drbd_al_begin_iod (heavy randon IO), an administrative request to detach the disk may deadlock for similar reasons as the recently fixed deadlock if detaching because of IO-error. The approach taken here is to either go through the intermediate cleanup state D_FAILED, or first lock out application io, don't just go directly to D_DISKLESS. We need an additional state bit (WAS_IO_ERROR) to distinguish the -> D_FAILED because of IO-error from other failures. Sanitize D_ATTACHING -> D_FAILED to D_ATTACHING -> D_DISKLESS. If only attaching, ldev may be missing still, but would be referenced from within the after_state_ch for -> D_FAILED, potentially dereferencing a NULL pointer. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd/drbd_int.h')
-rw-r--r--drivers/block/drbd/drbd_int.h20
1 files changed, 15 insertions, 5 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e0e0bf6f16a1..03c15e317c37 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -852,7 +852,8 @@ enum {
852 BITMAP_IO, /* suspend application io; 852 BITMAP_IO, /* suspend application io;
853 once no more io in flight, start bitmap io */ 853 once no more io in flight, start bitmap io */
854 BITMAP_IO_QUEUED, /* Started bitmap IO */ 854 BITMAP_IO_QUEUED, /* Started bitmap IO */
855 GO_DISKLESS, /* Disk failed, local_cnt reached zero, we are going diskless */ 855 GO_DISKLESS, /* Disk is being detached, on io-error or admin request. */
856 WAS_IO_ERROR, /* Local disk failed returned IO error */
856 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */ 857 RESYNC_AFTER_NEG, /* Resync after online grow after the attach&negotiate finished. */
857 NET_CONGESTED, /* The data socket is congested */ 858 NET_CONGESTED, /* The data socket is congested */
858 859
@@ -1281,6 +1282,7 @@ extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1281extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); 1282extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1282extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why); 1283extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
1283extern void drbd_go_diskless(struct drbd_conf *mdev); 1284extern void drbd_go_diskless(struct drbd_conf *mdev);
1285extern void drbd_ldev_destroy(struct drbd_conf *mdev);
1284 1286
1285 1287
1286/* Meta data layout 1288/* Meta data layout
@@ -1798,17 +1800,17 @@ static inline void __drbd_chk_io_error_(struct drbd_conf *mdev, int forcedetach,
1798 case EP_PASS_ON: 1800 case EP_PASS_ON:
1799 if (!forcedetach) { 1801 if (!forcedetach) {
1800 if (__ratelimit(&drbd_ratelimit_state)) 1802 if (__ratelimit(&drbd_ratelimit_state))
1801 dev_err(DEV, "Local IO failed in %s." 1803 dev_err(DEV, "Local IO failed in %s.\n", where);
1802 "Passing error on...\n", where);
1803 break; 1804 break;
1804 } 1805 }
1805 /* NOTE fall through to detach case if forcedetach set */ 1806 /* NOTE fall through to detach case if forcedetach set */
1806 case EP_DETACH: 1807 case EP_DETACH:
1807 case EP_CALL_HELPER: 1808 case EP_CALL_HELPER:
1809 set_bit(WAS_IO_ERROR, &mdev->flags);
1808 if (mdev->state.disk > D_FAILED) { 1810 if (mdev->state.disk > D_FAILED) {
1809 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL); 1811 _drbd_set_state(_NS(mdev, disk, D_FAILED), CS_HARD, NULL);
1810 dev_err(DEV, "Local IO failed in %s." 1812 dev_err(DEV,
1811 "Detaching...\n", where); 1813 "Local IO failed in %s. Detaching...\n", where);
1812 } 1814 }
1813 break; 1815 break;
1814 } 1816 }
@@ -2127,7 +2129,11 @@ static inline void put_ldev(struct drbd_conf *mdev)
2127 __release(local); 2129 __release(local);
2128 D_ASSERT(i >= 0); 2130 D_ASSERT(i >= 0);
2129 if (i == 0) { 2131 if (i == 0) {
2132 if (mdev->state.disk == D_DISKLESS)
2133 /* even internal references gone, safe to destroy */
2134 drbd_ldev_destroy(mdev);
2130 if (mdev->state.disk == D_FAILED) 2135 if (mdev->state.disk == D_FAILED)
2136 /* all application IO references gone. */
2131 drbd_go_diskless(mdev); 2137 drbd_go_diskless(mdev);
2132 wake_up(&mdev->misc_wait); 2138 wake_up(&mdev->misc_wait);
2133 } 2139 }
@@ -2138,6 +2144,10 @@ static inline int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_stat
2138{ 2144{
2139 int io_allowed; 2145 int io_allowed;
2140 2146
2147 /* never get a reference while D_DISKLESS */
2148 if (mdev->state.disk == D_DISKLESS)
2149 return 0;
2150
2141 atomic_inc(&mdev->local_cnt); 2151 atomic_inc(&mdev->local_cnt);
2142 io_allowed = (mdev->state.disk >= mins); 2152 io_allowed = (mdev->state.disk >= mins);
2143 if (!io_allowed) 2153 if (!io_allowed)