aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2011-01-21 04:56:44 -0500
committerPhilipp Reisner <philipp.reisner@linbit.com>2011-03-10 05:48:02 -0500
commit20ceb2b22edaf51e59e76087efdc71a16a2858de (patch)
treea4f267242725bac2a915e879a6b6ac259218c5fa /drivers/block
parent62b0da3a244ac33d25a77861ef1cc0080103f2ff (diff)
drbd: describe bitmap locking for bulk operation in finer detail
Now that we do no longer in-place endian-swap the bitmap, we allow selected bitmap operations (testing bits, sometimes even settting bits) during some bulk operations. This caused us to hit a lot of FIXME asserts similar to FIXME asender in drbd_bm_count_bits, bitmap locked for 'write from resync_finished' by worker Which now is nonsense: looking at the bitmap is perfectly legal as long as it is not being resized. This cosmetic patch defines some flags to describe expectations in finer detail, so the asserts in e.g. bm_change_bits_to() can be skipped if appropriate. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/drbd/drbd_bitmap.c48
-rw-r--r--drivers/block/drbd/drbd_int.h36
-rw-r--r--drivers/block/drbd/drbd_main.c58
-rw-r--r--drivers/block/drbd/drbd_nl.c19
-rw-r--r--drivers/block/drbd/drbd_receiver.c17
5 files changed, 115 insertions, 63 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 25428bc28476..b62dd5f26c5d 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -104,26 +104,16 @@ struct drbd_bitmap {
104 104
105 wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */ 105 wait_queue_head_t bm_io_wait; /* used to serialize IO of single pages */
106 106
107 unsigned long bm_flags; 107 enum bm_flag bm_flags;
108 108
109 /* debugging aid, in case we are still racy somewhere */ 109 /* debugging aid, in case we are still racy somewhere */
110 char *bm_why; 110 char *bm_why;
111 struct task_struct *bm_task; 111 struct task_struct *bm_task;
112}; 112};
113 113
114/* definition of bits in bm_flags */
115#define BM_LOCKED 0
116// #define BM_MD_IO_ERROR 1 unused now.
117#define BM_P_VMALLOCED 2
118
119static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, 114static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
120 unsigned long e, int val, const enum km_type km); 115 unsigned long e, int val, const enum km_type km);
121 116
122static int bm_is_locked(struct drbd_bitmap *b)
123{
124 return test_bit(BM_LOCKED, &b->bm_flags);
125}
126
127#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__) 117#define bm_print_lock_info(m) __bm_print_lock_info(m, __func__)
128static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) 118static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
129{ 119{
@@ -140,7 +130,7 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func)
140 b->bm_task == mdev->worker.task ? "worker" : "?"); 130 b->bm_task == mdev->worker.task ? "worker" : "?");
141} 131}
142 132
143void drbd_bm_lock(struct drbd_conf *mdev, char *why) 133void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags)
144{ 134{
145 struct drbd_bitmap *b = mdev->bitmap; 135 struct drbd_bitmap *b = mdev->bitmap;
146 int trylock_failed; 136 int trylock_failed;
@@ -163,8 +153,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why)
163 b->bm_task == mdev->worker.task ? "worker" : "?"); 153 b->bm_task == mdev->worker.task ? "worker" : "?");
164 mutex_lock(&b->bm_change); 154 mutex_lock(&b->bm_change);
165 } 155 }
166 if (__test_and_set_bit(BM_LOCKED, &b->bm_flags)) 156 if (BM_LOCKED_MASK & b->bm_flags)
167 dev_err(DEV, "FIXME bitmap already locked in bm_lock\n"); 157 dev_err(DEV, "FIXME bitmap already locked in bm_lock\n");
158 b->bm_flags |= flags & BM_LOCKED_MASK;
168 159
169 b->bm_why = why; 160 b->bm_why = why;
170 b->bm_task = current; 161 b->bm_task = current;
@@ -178,9 +169,10 @@ void drbd_bm_unlock(struct drbd_conf *mdev)
178 return; 169 return;
179 } 170 }
180 171
181 if (!__test_and_clear_bit(BM_LOCKED, &mdev->bitmap->bm_flags)) 172 if (!(BM_LOCKED_MASK & mdev->bitmap->bm_flags))
182 dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n"); 173 dev_err(DEV, "FIXME bitmap not locked in bm_unlock\n");
183 174
175 b->bm_flags &= ~BM_LOCKED_MASK;
184 b->bm_why = NULL; 176 b->bm_why = NULL;
185 b->bm_task = NULL; 177 b->bm_task = NULL;
186 mutex_unlock(&b->bm_change); 178 mutex_unlock(&b->bm_change);
@@ -421,9 +413,9 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want)
421 } 413 }
422 414
423 if (vmalloced) 415 if (vmalloced)
424 set_bit(BM_P_VMALLOCED, &b->bm_flags); 416 b->bm_flags |= BM_P_VMALLOCED;
425 else 417 else
426 clear_bit(BM_P_VMALLOCED, &b->bm_flags); 418 b->bm_flags &= ~BM_P_VMALLOCED;
427 419
428 return new_pages; 420 return new_pages;
429} 421}
@@ -460,7 +452,7 @@ void drbd_bm_cleanup(struct drbd_conf *mdev)
460{ 452{
461 ERR_IF (!mdev->bitmap) return; 453 ERR_IF (!mdev->bitmap) return;
462 bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); 454 bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages);
463 bm_vk_free(mdev->bitmap->bm_pages, test_bit(BM_P_VMALLOCED, &mdev->bitmap->bm_flags)); 455 bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags));
464 kfree(mdev->bitmap); 456 kfree(mdev->bitmap);
465 mdev->bitmap = NULL; 457 mdev->bitmap = NULL;
466} 458}
@@ -623,7 +615,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
623 615
624 ERR_IF(!b) return -ENOMEM; 616 ERR_IF(!b) return -ENOMEM;
625 617
626 drbd_bm_lock(mdev, "resize"); 618 drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK);
627 619
628 dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n", 620 dev_info(DEV, "drbd_bm_resize called with capacity == %llu\n",
629 (unsigned long long)capacity); 621 (unsigned long long)capacity);
@@ -631,7 +623,7 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
631 if (capacity == b->bm_dev_capacity) 623 if (capacity == b->bm_dev_capacity)
632 goto out; 624 goto out;
633 625
634 opages_vmalloced = test_bit(BM_P_VMALLOCED, &b->bm_flags); 626 opages_vmalloced = (BM_P_VMALLOCED & b->bm_flags);
635 627
636 if (capacity == 0) { 628 if (capacity == 0) {
637 spin_lock_irq(&b->bm_lock); 629 spin_lock_irq(&b->bm_lock);
@@ -1030,7 +1022,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned lazy_writeout_upper_id
1030 * as we submit copies of pages anyways. 1022 * as we submit copies of pages anyways.
1031 */ 1023 */
1032 if (!ctx.flags) 1024 if (!ctx.flags)
1033 WARN_ON(!bm_is_locked(b)); 1025 WARN_ON(!(BM_LOCKED_MASK & b->bm_flags));
1034 1026
1035 num_pages = b->bm_number_of_pages; 1027 num_pages = b->bm_number_of_pages;
1036 1028
@@ -1220,7 +1212,7 @@ static unsigned long bm_find_next(struct drbd_conf *mdev,
1220 ERR_IF(!b->bm_pages) return i; 1212 ERR_IF(!b->bm_pages) return i;
1221 1213
1222 spin_lock_irq(&b->bm_lock); 1214 spin_lock_irq(&b->bm_lock);
1223 if (bm_is_locked(b)) 1215 if (BM_DONT_TEST & b->bm_flags)
1224 bm_print_lock_info(mdev); 1216 bm_print_lock_info(mdev);
1225 1217
1226 i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1); 1218 i = __bm_find_next(mdev, bm_fo, find_zero_bit, KM_IRQ1);
@@ -1246,13 +1238,13 @@ unsigned long drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo
1246 * you must take drbd_bm_lock() first */ 1238 * you must take drbd_bm_lock() first */
1247unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo) 1239unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo)
1248{ 1240{
1249 /* WARN_ON(!bm_is_locked(mdev)); */ 1241 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
1250 return __bm_find_next(mdev, bm_fo, 0, KM_USER1); 1242 return __bm_find_next(mdev, bm_fo, 0, KM_USER1);
1251} 1243}
1252 1244
1253unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo) 1245unsigned long _drbd_bm_find_next_zero(struct drbd_conf *mdev, unsigned long bm_fo)
1254{ 1246{
1255 /* WARN_ON(!bm_is_locked(mdev)); */ 1247 /* WARN_ON(!(BM_DONT_SET & mdev->b->bm_flags)); */
1256 return __bm_find_next(mdev, bm_fo, 1, KM_USER1); 1248 return __bm_find_next(mdev, bm_fo, 1, KM_USER1);
1257} 1249}
1258 1250
@@ -1322,7 +1314,7 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1322 ERR_IF(!b->bm_pages) return 0; 1314 ERR_IF(!b->bm_pages) return 0;
1323 1315
1324 spin_lock_irqsave(&b->bm_lock, flags); 1316 spin_lock_irqsave(&b->bm_lock, flags);
1325 if (bm_is_locked(b)) 1317 if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags)
1326 bm_print_lock_info(mdev); 1318 bm_print_lock_info(mdev);
1327 1319
1328 c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1); 1320 c = __bm_change_bits_to(mdev, s, e, val, KM_IRQ1);
@@ -1439,7 +1431,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
1439 ERR_IF(!b->bm_pages) return 0; 1431 ERR_IF(!b->bm_pages) return 0;
1440 1432
1441 spin_lock_irqsave(&b->bm_lock, flags); 1433 spin_lock_irqsave(&b->bm_lock, flags);
1442 if (bm_is_locked(b)) 1434 if (BM_DONT_TEST & b->bm_flags)
1443 bm_print_lock_info(mdev); 1435 bm_print_lock_info(mdev);
1444 if (bitnr < b->bm_bits) { 1436 if (bitnr < b->bm_bits) {
1445 p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); 1437 p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
@@ -1474,7 +1466,7 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi
1474 ERR_IF(!b->bm_pages) return 1; 1466 ERR_IF(!b->bm_pages) return 1;
1475 1467
1476 spin_lock_irqsave(&b->bm_lock, flags); 1468 spin_lock_irqsave(&b->bm_lock, flags);
1477 if (bm_is_locked(b)) 1469 if (BM_DONT_TEST & b->bm_flags)
1478 bm_print_lock_info(mdev); 1470 bm_print_lock_info(mdev);
1479 for (bitnr = s; bitnr <= e; bitnr++) { 1471 for (bitnr = s; bitnr <= e; bitnr++) {
1480 unsigned int idx = bm_bit_to_page_idx(b, bitnr); 1472 unsigned int idx = bm_bit_to_page_idx(b, bitnr);
@@ -1522,7 +1514,7 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
1522 ERR_IF(!b->bm_pages) return 0; 1514 ERR_IF(!b->bm_pages) return 0;
1523 1515
1524 spin_lock_irqsave(&b->bm_lock, flags); 1516 spin_lock_irqsave(&b->bm_lock, flags);
1525 if (bm_is_locked(b)) 1517 if (BM_DONT_TEST & b->bm_flags)
1526 bm_print_lock_info(mdev); 1518 bm_print_lock_info(mdev);
1527 1519
1528 s = S2W(enr); 1520 s = S2W(enr);
@@ -1555,7 +1547,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
1555 ERR_IF(!b->bm_pages) return 0; 1547 ERR_IF(!b->bm_pages) return 0;
1556 1548
1557 spin_lock_irq(&b->bm_lock); 1549 spin_lock_irq(&b->bm_lock);
1558 if (bm_is_locked(b)) 1550 if (BM_DONT_SET & b->bm_flags)
1559 bm_print_lock_info(mdev); 1551 bm_print_lock_info(mdev);
1560 weight = b->bm_set; 1552 weight = b->bm_set;
1561 1553
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 0a9059eb94db..267d9897ca8c 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -855,6 +855,32 @@ enum {
855 855
856struct drbd_bitmap; /* opaque for drbd_conf */ 856struct drbd_bitmap; /* opaque for drbd_conf */
857 857
858/* definition of bits in bm_flags to be used in drbd_bm_lock
859 * and drbd_bitmap_io and friends. */
860enum bm_flag {
861 /* do we need to kfree, or vfree bm_pages? */
862 BM_P_VMALLOCED = 0x10000, /* internal use only, will be masked out */
863
864 /* currently locked for bulk operation */
865 BM_LOCKED_MASK = 0x7,
866
867 /* in detail, that is: */
868 BM_DONT_CLEAR = 0x1,
869 BM_DONT_SET = 0x2,
870 BM_DONT_TEST = 0x4,
871
872 /* (test bit, count bit) allowed (common case) */
873 BM_LOCKED_TEST_ALLOWED = 0x3,
874
875 /* testing bits, as well as setting new bits allowed, but clearing bits
876 * would be unexpected. Used during bitmap receive. Setting new bits
877 * requires sending of "out-of-sync" information, though. */
878 BM_LOCKED_SET_ALLOWED = 0x1,
879
880 /* clear is not expected while bitmap is locked for bulk operation */
881};
882
883
858/* TODO sort members for performance 884/* TODO sort members for performance
859 * MAYBE group them further */ 885 * MAYBE group them further */
860 886
@@ -920,6 +946,7 @@ struct drbd_md_io {
920struct bm_io_work { 946struct bm_io_work {
921 struct drbd_work w; 947 struct drbd_work w;
922 char *why; 948 char *why;
949 enum bm_flag flags;
923 int (*io_fn)(struct drbd_conf *mdev); 950 int (*io_fn)(struct drbd_conf *mdev);
924 void (*done)(struct drbd_conf *mdev, int rv); 951 void (*done)(struct drbd_conf *mdev, int rv);
925}; 952};
@@ -1242,7 +1269,6 @@ extern void drbd_free_bc(struct drbd_backing_dev *ldev);
1242extern void drbd_mdev_cleanup(struct drbd_conf *mdev); 1269extern void drbd_mdev_cleanup(struct drbd_conf *mdev);
1243void drbd_print_uuids(struct drbd_conf *mdev, const char *text); 1270void drbd_print_uuids(struct drbd_conf *mdev, const char *text);
1244 1271
1245/* drbd_meta-data.c (still in drbd_main.c) */
1246extern void drbd_md_sync(struct drbd_conf *mdev); 1272extern void drbd_md_sync(struct drbd_conf *mdev);
1247extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev); 1273extern int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev);
1248extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local); 1274extern void drbd_uuid_set(struct drbd_conf *mdev, int idx, u64 val) __must_hold(local);
@@ -1263,10 +1289,12 @@ extern void drbd_md_mark_dirty_(struct drbd_conf *mdev,
1263extern void drbd_queue_bitmap_io(struct drbd_conf *mdev, 1289extern void drbd_queue_bitmap_io(struct drbd_conf *mdev,
1264 int (*io_fn)(struct drbd_conf *), 1290 int (*io_fn)(struct drbd_conf *),
1265 void (*done)(struct drbd_conf *, int), 1291 void (*done)(struct drbd_conf *, int),
1266 char *why); 1292 char *why, enum bm_flag flags);
1293extern int drbd_bitmap_io(struct drbd_conf *mdev,
1294 int (*io_fn)(struct drbd_conf *),
1295 char *why, enum bm_flag flags);
1267extern int drbd_bmio_set_n_write(struct drbd_conf *mdev); 1296extern int drbd_bmio_set_n_write(struct drbd_conf *mdev);
1268extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev); 1297extern int drbd_bmio_clear_n_write(struct drbd_conf *mdev);
1269extern int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why);
1270extern void drbd_go_diskless(struct drbd_conf *mdev); 1298extern void drbd_go_diskless(struct drbd_conf *mdev);
1271extern void drbd_ldev_destroy(struct drbd_conf *mdev); 1299extern void drbd_ldev_destroy(struct drbd_conf *mdev);
1272 1300
@@ -1452,7 +1480,7 @@ extern void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset,
1452extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, 1480extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
1453 size_t number, unsigned long *buffer); 1481 size_t number, unsigned long *buffer);
1454 1482
1455extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); 1483extern void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags);
1456extern void drbd_bm_unlock(struct drbd_conf *mdev); 1484extern void drbd_bm_unlock(struct drbd_conf *mdev);
1457/* drbd_main.c */ 1485/* drbd_main.c */
1458 1486
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index b68332a0e73e..a9e9b496e73b 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -1320,7 +1320,9 @@ static void abw_start_sync(struct drbd_conf *mdev, int rv)
1320 } 1320 }
1321} 1321}
1322 1322
1323int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) 1323int drbd_bitmap_io_from_worker(struct drbd_conf *mdev,
1324 int (*io_fn)(struct drbd_conf *),
1325 char *why, enum bm_flag flags)
1324{ 1326{
1325 int rv; 1327 int rv;
1326 1328
@@ -1328,10 +1330,8 @@ int drbd_bitmap_io_from_worker(struct drbd_conf *mdev, int (*io_fn)(struct drbd_
1328 1330
1329 /* open coded non-blocking drbd_suspend_io(mdev); */ 1331 /* open coded non-blocking drbd_suspend_io(mdev); */
1330 set_bit(SUSPEND_IO, &mdev->flags); 1332 set_bit(SUSPEND_IO, &mdev->flags);
1331 if (!is_susp(mdev->state))
1332 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
1333 1333
1334 drbd_bm_lock(mdev, why); 1334 drbd_bm_lock(mdev, why, flags);
1335 rv = io_fn(mdev); 1335 rv = io_fn(mdev);
1336 drbd_bm_unlock(mdev); 1336 drbd_bm_unlock(mdev);
1337 1337
@@ -1438,7 +1438,8 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1438 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S && 1438 if (os.conn != C_WF_BITMAP_S && ns.conn == C_WF_BITMAP_S &&
1439 mdev->state.conn == C_WF_BITMAP_S) 1439 mdev->state.conn == C_WF_BITMAP_S)
1440 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL, 1440 drbd_queue_bitmap_io(mdev, &drbd_send_bitmap, NULL,
1441 "send_bitmap (WFBitMapS)"); 1441 "send_bitmap (WFBitMapS)",
1442 BM_LOCKED_TEST_ALLOWED);
1442 1443
1443 /* Lost contact to peer's copy of the data */ 1444 /* Lost contact to peer's copy of the data */
1444 if ((os.pdsk >= D_INCONSISTENT && 1445 if ((os.pdsk >= D_INCONSISTENT &&
@@ -1469,7 +1470,11 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1469 1470
1470 /* D_DISKLESS Peer becomes secondary */ 1471 /* D_DISKLESS Peer becomes secondary */
1471 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY) 1472 if (os.peer == R_PRIMARY && ns.peer == R_SECONDARY)
1472 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote diskless peer"); 1473 /* We may still be Primary ourselves.
1474 * No harm done if the bitmap still changes,
1475 * redirtied pages will follow later. */
1476 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1477 "demote diskless peer", BM_LOCKED_SET_ALLOWED);
1473 put_ldev(mdev); 1478 put_ldev(mdev);
1474 } 1479 }
1475 1480
@@ -1478,7 +1483,10 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1478 * if there is a resync going on still */ 1483 * if there is a resync going on still */
1479 if (os.role == R_PRIMARY && ns.role == R_SECONDARY && 1484 if (os.role == R_PRIMARY && ns.role == R_SECONDARY &&
1480 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) { 1485 mdev->state.conn <= C_CONNECTED && get_ldev(mdev)) {
1481 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write, "demote"); 1486 /* No changes to the bitmap expected this time, so assert that,
1487 * even though no harm was done if it did change. */
1488 drbd_bitmap_io_from_worker(mdev, &drbd_bm_write,
1489 "demote", BM_LOCKED_TEST_ALLOWED);
1482 put_ldev(mdev); 1490 put_ldev(mdev);
1483 } 1491 }
1484 1492
@@ -1512,12 +1520,17 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1512 /* We are in the progress to start a full sync... */ 1520 /* We are in the progress to start a full sync... */
1513 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) || 1521 if ((os.conn != C_STARTING_SYNC_T && ns.conn == C_STARTING_SYNC_T) ||
1514 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S)) 1522 (os.conn != C_STARTING_SYNC_S && ns.conn == C_STARTING_SYNC_S))
1515 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, &abw_start_sync, "set_n_write from StartingSync"); 1523 /* no other bitmap changes expected during this phase */
1524 drbd_queue_bitmap_io(mdev,
1525 &drbd_bmio_set_n_write, &abw_start_sync,
1526 "set_n_write from StartingSync", BM_LOCKED_TEST_ALLOWED);
1516 1527
1517 /* We are invalidating our self... */ 1528 /* We are invalidating our self... */
1518 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED && 1529 if (os.conn < C_CONNECTED && ns.conn < C_CONNECTED &&
1519 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT) 1530 os.disk > D_INCONSISTENT && ns.disk == D_INCONSISTENT)
1520 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL, "set_n_write from invalidate"); 1531 /* other bitmap operation expected during this phase */
1532 drbd_queue_bitmap_io(mdev, &drbd_bmio_set_n_write, NULL,
1533 "set_n_write from invalidate", BM_LOCKED_MASK);
1521 1534
1522 /* first half of local IO error, failure to attach, 1535 /* first half of local IO error, failure to attach,
1523 * or administrative detach */ 1536 * or administrative detach */
@@ -1599,14 +1612,14 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
1599 1612
1600 /* This triggers bitmap writeout of potentially still unwritten pages 1613 /* This triggers bitmap writeout of potentially still unwritten pages
1601 * if the resync finished cleanly, or aborted because of peer disk 1614 * if the resync finished cleanly, or aborted because of peer disk
1602 * failure. Resync aborted because of connection failure does bitmap 1615 * failure, or because of connection loss.
1603 * writeout from drbd_disconnect.
1604 * For resync aborted because of local disk failure, we cannot do 1616 * For resync aborted because of local disk failure, we cannot do
1605 * any bitmap writeout anymore. 1617 * any bitmap writeout anymore.
1618 * No harm done if some bits change during this phase.
1606 */ 1619 */
1607 if (os.conn > C_CONNECTED && ns.conn == C_CONNECTED && 1620 if (os.conn > C_CONNECTED && ns.conn <= C_CONNECTED && get_ldev(mdev)) {
1608 mdev->state.conn == C_CONNECTED && get_ldev(mdev)) { 1621 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL,
1609 drbd_queue_bitmap_io(mdev, &drbd_bm_write, NULL, "write from resync_finished"); 1622 "write from resync_finished", BM_LOCKED_SET_ALLOWED);
1610 put_ldev(mdev); 1623 put_ldev(mdev);
1611 } 1624 }
1612 1625
@@ -3929,7 +3942,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3929 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0); 3942 D_ASSERT(atomic_read(&mdev->ap_bio_cnt) == 0);
3930 3943
3931 if (get_ldev(mdev)) { 3944 if (get_ldev(mdev)) {
3932 drbd_bm_lock(mdev, work->why); 3945 drbd_bm_lock(mdev, work->why, work->flags);
3933 rv = work->io_fn(mdev); 3946 rv = work->io_fn(mdev);
3934 drbd_bm_unlock(mdev); 3947 drbd_bm_unlock(mdev);
3935 put_ldev(mdev); 3948 put_ldev(mdev);
@@ -3944,6 +3957,7 @@ static int w_bitmap_io(struct drbd_conf *mdev, struct drbd_work *w, int unused)
3944 3957
3945 clear_bit(BITMAP_IO_QUEUED, &mdev->flags); 3958 clear_bit(BITMAP_IO_QUEUED, &mdev->flags);
3946 work->why = NULL; 3959 work->why = NULL;
3960 work->flags = 0;
3947 3961
3948 return 1; 3962 return 1;
3949} 3963}
@@ -3998,7 +4012,7 @@ void drbd_go_diskless(struct drbd_conf *mdev)
3998void drbd_queue_bitmap_io(struct drbd_conf *mdev, 4012void drbd_queue_bitmap_io(struct drbd_conf *mdev,
3999 int (*io_fn)(struct drbd_conf *), 4013 int (*io_fn)(struct drbd_conf *),
4000 void (*done)(struct drbd_conf *, int), 4014 void (*done)(struct drbd_conf *, int),
4001 char *why) 4015 char *why, enum bm_flag flags)
4002{ 4016{
4003 D_ASSERT(current == mdev->worker.task); 4017 D_ASSERT(current == mdev->worker.task);
4004 4018
@@ -4012,6 +4026,7 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
4012 mdev->bm_io_work.io_fn = io_fn; 4026 mdev->bm_io_work.io_fn = io_fn;
4013 mdev->bm_io_work.done = done; 4027 mdev->bm_io_work.done = done;
4014 mdev->bm_io_work.why = why; 4028 mdev->bm_io_work.why = why;
4029 mdev->bm_io_work.flags = flags;
4015 4030
4016 spin_lock_irq(&mdev->req_lock); 4031 spin_lock_irq(&mdev->req_lock);
4017 set_bit(BITMAP_IO, &mdev->flags); 4032 set_bit(BITMAP_IO, &mdev->flags);
@@ -4031,19 +4046,22 @@ void drbd_queue_bitmap_io(struct drbd_conf *mdev,
4031 * freezes application IO while that the actual IO operations runs. This 4046 * freezes application IO while that the actual IO operations runs. This
4032 * functions MAY NOT be called from worker context. 4047 * functions MAY NOT be called from worker context.
4033 */ 4048 */
4034int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *), char *why) 4049int drbd_bitmap_io(struct drbd_conf *mdev, int (*io_fn)(struct drbd_conf *),
4050 char *why, enum bm_flag flags)
4035{ 4051{
4036 int rv; 4052 int rv;
4037 4053
4038 D_ASSERT(current != mdev->worker.task); 4054 D_ASSERT(current != mdev->worker.task);
4039 4055
4040 drbd_suspend_io(mdev); 4056 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4057 drbd_suspend_io(mdev);
4041 4058
4042 drbd_bm_lock(mdev, why); 4059 drbd_bm_lock(mdev, why, flags);
4043 rv = io_fn(mdev); 4060 rv = io_fn(mdev);
4044 drbd_bm_unlock(mdev); 4061 drbd_bm_unlock(mdev);
4045 4062
4046 drbd_resume_io(mdev); 4063 if ((flags & BM_LOCKED_SET_ALLOWED) == 0)
4064 drbd_resume_io(mdev);
4047 4065
4048 return rv; 4066 return rv;
4049} 4067}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index ce6f2fe80852..bc0bcb964603 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -648,7 +648,9 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, enum dds_
648 dev_info(DEV, "Writing the whole bitmap, %s\n", 648 dev_info(DEV, "Writing the whole bitmap, %s\n",
649 la_size_changed && md_moved ? "size changed and md moved" : 649 la_size_changed && md_moved ? "size changed and md moved" :
650 la_size_changed ? "size changed" : "md moved"); 650 la_size_changed ? "size changed" : "md moved");
651 err = drbd_bitmap_io(mdev, &drbd_bm_write, "size changed"); /* does drbd_resume_io() ! */ 651 /* next line implicitly does drbd_suspend_io()+drbd_resume_io() */
652 err = drbd_bitmap_io(mdev, &drbd_bm_write,
653 "size changed", BM_LOCKED_MASK);
652 if (err) { 654 if (err) {
653 rv = dev_size_error; 655 rv = dev_size_error;
654 goto out; 656 goto out;
@@ -1160,12 +1162,14 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1160 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) { 1162 if (drbd_md_test_flag(mdev->ldev, MDF_FULL_SYNC)) {
1161 dev_info(DEV, "Assuming that all blocks are out of sync " 1163 dev_info(DEV, "Assuming that all blocks are out of sync "
1162 "(aka FullSync)\n"); 1164 "(aka FullSync)\n");
1163 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from attaching")) { 1165 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write,
1166 "set_n_write from attaching", BM_LOCKED_MASK)) {
1164 retcode = ERR_IO_MD_DISK; 1167 retcode = ERR_IO_MD_DISK;
1165 goto force_diskless_dec; 1168 goto force_diskless_dec;
1166 } 1169 }
1167 } else { 1170 } else {
1168 if (drbd_bitmap_io(mdev, &drbd_bm_read, "read from attaching") < 0) { 1171 if (drbd_bitmap_io(mdev, &drbd_bm_read,
1172 "read from attaching", BM_LOCKED_MASK) < 0) {
1169 retcode = ERR_IO_MD_DISK; 1173 retcode = ERR_IO_MD_DISK;
1170 goto force_diskless_dec; 1174 goto force_diskless_dec;
1171 } 1175 }
@@ -1173,7 +1177,8 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
1173 1177
1174 if (cp_discovered) { 1178 if (cp_discovered) {
1175 drbd_al_apply_to_bm(mdev); 1179 drbd_al_apply_to_bm(mdev);
1176 if (drbd_bitmap_io(mdev, &drbd_bm_write, "crashed primary apply AL")) { 1180 if (drbd_bitmap_io(mdev, &drbd_bm_write,
1181 "crashed primary apply AL", BM_LOCKED_MASK)) {
1177 retcode = ERR_IO_MD_DISK; 1182 retcode = ERR_IO_MD_DISK;
1178 goto force_diskless_dec; 1183 goto force_diskless_dec;
1179 } 1184 }
@@ -1925,7 +1930,8 @@ static int drbd_nl_invalidate_peer(struct drbd_conf *mdev, struct drbd_nl_cfg_re
1925 retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT)); 1930 retcode = drbd_request_state(mdev, NS(pdsk, D_INCONSISTENT));
1926 if (retcode >= SS_SUCCESS) { 1931 if (retcode >= SS_SUCCESS) {
1927 if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al, 1932 if (drbd_bitmap_io(mdev, &drbd_bmio_set_susp_al,
1928 "set_n_write from invalidate_peer")) 1933 "set_n_write from invalidate_peer",
1934 BM_LOCKED_SET_ALLOWED))
1929 retcode = ERR_IO_MD_DISK; 1935 retcode = ERR_IO_MD_DISK;
1930 } 1936 }
1931 } else 1937 } else
@@ -2143,7 +2149,8 @@ static int drbd_nl_new_c_uuid(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nl
2143 drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */ 2149 drbd_uuid_new_current(mdev); /* New current, previous to UI_BITMAP */
2144 2150
2145 if (args.clear_bm) { 2151 if (args.clear_bm) {
2146 err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, "clear_n_write from new_c_uuid"); 2152 err = drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
2153 "clear_n_write from new_c_uuid", BM_LOCKED_MASK);
2147 if (err) { 2154 if (err) {
2148 dev_err(DEV, "Writing bitmap failed with %d\n",err); 2155 dev_err(DEV, "Writing bitmap failed with %d\n",err);
2149 retcode = ERR_IO_MD_DISK; 2156 retcode = ERR_IO_MD_DISK;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e5686a81f42c..e13134f83fae 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2599,7 +2599,8 @@ static enum drbd_conns drbd_sync_handshake(struct drbd_conf *mdev, enum drbd_rol
2599 2599
2600 if (abs(hg) >= 2) { 2600 if (abs(hg) >= 2) {
2601 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n"); 2601 dev_info(DEV, "Writing the whole bitmap, full sync required after drbd_sync_handshake.\n");
2602 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake")) 2602 if (drbd_bitmap_io(mdev, &drbd_bmio_set_n_write, "set_n_write from sync_handshake",
2603 BM_LOCKED_SET_ALLOWED))
2603 return C_MASK; 2604 return C_MASK;
2604 } 2605 }
2605 2606
@@ -3053,7 +3054,8 @@ static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned
3053 if (skip_initial_sync) { 3054 if (skip_initial_sync) {
3054 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n"); 3055 dev_info(DEV, "Accepted new current UUID, preparing to skip initial sync\n");
3055 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write, 3056 drbd_bitmap_io(mdev, &drbd_bmio_clear_n_write,
3056 "clear_n_write from receive_uuids"); 3057 "clear_n_write from receive_uuids",
3058 BM_LOCKED_TEST_ALLOWED);
3057 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]); 3059 _drbd_uuid_set(mdev, UI_CURRENT, p_uuid[UI_CURRENT]);
3058 _drbd_uuid_set(mdev, UI_BITMAP, 0); 3060 _drbd_uuid_set(mdev, UI_BITMAP, 0);
3059 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE), 3061 _drbd_set_state(_NS2(mdev, disk, D_UP_TO_DATE, pdsk, D_UP_TO_DATE),
@@ -3494,7 +3496,9 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3494 int ok = false; 3496 int ok = false;
3495 struct p_header80 *h = &mdev->data.rbuf.header.h80; 3497 struct p_header80 *h = &mdev->data.rbuf.header.h80;
3496 3498
3497 /* drbd_bm_lock(mdev, "receive bitmap"); By intention no bm_lock */ 3499 drbd_bm_lock(mdev, "receive bitmap", BM_LOCKED_SET_ALLOWED);
3500 /* you are supposed to send additional out-of-sync information
3501 * if you actually set bits during this phase */
3498 3502
3499 /* maybe we should use some per thread scratch page, 3503 /* maybe we should use some per thread scratch page,
3500 * and allocate that during initial device creation? */ 3504 * and allocate that during initial device creation? */
@@ -3568,7 +3572,7 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne
3568 3572
3569 ok = true; 3573 ok = true;
3570 out: 3574 out:
3571 /* drbd_bm_unlock(mdev); by intention no lock */ 3575 drbd_bm_unlock(mdev);
3572 if (ok && mdev->state.conn == C_WF_BITMAP_S) 3576 if (ok && mdev->state.conn == C_WF_BITMAP_S)
3573 drbd_start_resync(mdev, C_SYNC_SOURCE); 3577 drbd_start_resync(mdev, C_SYNC_SOURCE);
3574 free_page((unsigned long) buffer); 3578 free_page((unsigned long) buffer);
@@ -3817,7 +3821,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3817 3821
3818 fp = FP_DONT_CARE; 3822 fp = FP_DONT_CARE;
3819 if (get_ldev(mdev)) { 3823 if (get_ldev(mdev)) {
3820 drbd_bitmap_io(mdev, &drbd_bm_write, "write from disconnect");
3821 fp = mdev->ldev->dc.fencing; 3824 fp = mdev->ldev->dc.fencing;
3822 put_ldev(mdev); 3825 put_ldev(mdev);
3823 } 3826 }
@@ -3846,6 +3849,10 @@ static void drbd_disconnect(struct drbd_conf *mdev)
3846 drbd_request_state(mdev, NS(conn, C_STANDALONE)); 3849 drbd_request_state(mdev, NS(conn, C_STANDALONE));
3847 } 3850 }
3848 3851
3852 /* serialize with bitmap writeout triggered by the state change,
3853 * if any. */
3854 wait_event(mdev->misc_wait, !test_bit(BITMAP_IO, &mdev->flags));
3855
3849 /* tcp_close and release of sendpage pages can be deferred. I don't 3856 /* tcp_close and release of sendpage pages can be deferred. I don't
3850 * want to use SO_LINGER, because apparently it can be deferred for 3857 * want to use SO_LINGER, because apparently it can be deferred for
3851 * more than 20 seconds (longest time I checked). 3858 * more than 20 seconds (longest time I checked).