aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLars Ellenberg <lars.ellenberg@linbit.com>2010-12-14 09:13:04 -0500
committerPhilipp Reisner <philipp.reisner@linbit.com>2011-03-10 05:43:24 -0500
commit4b0715f09655e76ca24c35a9e25e7c464c2f7346 (patch)
treee98706a35b1e18cad09f01d2346d9a1c938c081c
parent19f843aa08e2d8f87a09b4c2edc43b00638423a8 (diff)
drbd: allow petabyte storage on 64bit arch
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r--drivers/block/drbd/drbd_bitmap.c170
-rw-r--r--drivers/block/drbd/drbd_int.h41
-rw-r--r--drivers/block/drbd/drbd_nl.c8
-rw-r--r--drivers/block/drbd/drbd_proc.c6
-rw-r--r--drivers/block/drbd/drbd_worker.c2
5 files changed, 142 insertions, 85 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c
index 72cd41a96ef9..0e31e573af72 100644
--- a/drivers/block/drbd/drbd_bitmap.c
+++ b/drivers/block/drbd/drbd_bitmap.c
@@ -37,10 +37,46 @@
37 * convention: 37 * convention:
38 * function name drbd_bm_... => used elsewhere, "public". 38 * function name drbd_bm_... => used elsewhere, "public".
39 * function name bm_... => internal to implementation, "private". 39 * function name bm_... => internal to implementation, "private".
40 */
41
40 42
41 * Note that since find_first_bit returns int, at the current granularity of 43/*
42 * the bitmap (4KB per byte), this implementation "only" supports up to 44 * LIMITATIONS:
43 * 1<<(32+12) == 16 TB... 45 * We want to support >= peta byte of backend storage, while for now still using
46 * a granularity of one bit per 4KiB of storage.
47 * 1 << 50 bytes backend storage (1 PiB)
48 * 1 << (50 - 12) bits needed
49 * 38 --> we need u64 to index and count bits
50 * 1 << (38 - 3) bitmap bytes needed
51 * 35 --> we still need u64 to index and count bytes
52 * (that's 32 GiB of bitmap for 1 PiB storage)
53 * 1 << (35 - 2) 32bit longs needed
54 * 33 --> we'd even need u64 to index and count 32bit long words.
55 * 1 << (35 - 3) 64bit longs needed
56 * 32 --> we could get away with a 32bit unsigned int to index and count
57 * 64bit long words, but I rather stay with unsigned long for now.
58 * We probably should neither count nor point to bytes or long words
59 * directly, but either by bitnumber, or by page index and offset.
60 * 1 << (35 - 12)
61 * 22 --> we need that much 4KiB pages of bitmap.
62 * 1 << (22 + 3) --> on a 64bit arch,
63 * we need 32 MiB to store the array of page pointers.
64 *
65 * Because I'm lazy, and because the resulting patch was too large, too ugly
66 * and still incomplete, on 32bit we still "only" support 16 TiB (minus some),
67 * (1 << 32) bits * 4k storage.
68 *
69
70 * bitmap storage and IO:
71 * Bitmap is stored little endian on disk, and is kept little endian in
72 * core memory. Currently we still hold the full bitmap in core as long
73 * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage
74 * seems excessive.
75 *
76 * We plan to reduce the amount of in-core bitmap pages by pageing them in
77 * and out against their on-disk location as necessary, but need to make
78 * sure we don't cause too much meta data IO, and must not deadlock in
79 * tight memory situations. This needs some more work.
44 */ 80 */
45 81
46/* 82/*
@@ -56,13 +92,9 @@
56struct drbd_bitmap { 92struct drbd_bitmap {
57 struct page **bm_pages; 93 struct page **bm_pages;
58 spinlock_t bm_lock; 94 spinlock_t bm_lock;
59 /* WARNING unsigned long bm_*: 95
60 * 32bit number of bit offset is just enough for 512 MB bitmap. 96 /* see LIMITATIONS: above */
61 * it will blow up if we make the bitmap bigger... 97
62 * not that it makes much sense to have a bitmap that large,
63 * rather change the granularity to 16k or 64k or something.
64 * (that implies other problems, however...)
65 */
66 unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ 98 unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */
67 unsigned long bm_bits; 99 unsigned long bm_bits;
68 size_t bm_words; 100 size_t bm_words;
@@ -517,43 +549,39 @@ static void bm_set_surplus(struct drbd_bitmap *b)
517 bm_unmap(p_addr); 549 bm_unmap(p_addr);
518} 550}
519 551
552/* you better not modify the bitmap while this is running,
553 * or its results will be stale */
520static unsigned long bm_count_bits(struct drbd_bitmap *b) 554static unsigned long bm_count_bits(struct drbd_bitmap *b)
521{ 555{
522 unsigned long *p_addr, *bm, offset = 0; 556 unsigned long *p_addr;
523 unsigned long bits = 0; 557 unsigned long bits = 0;
524 unsigned long i, do_now; 558 unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1;
525 unsigned long words; 559 int idx, last_page, i, last_word;
526 560
527 /* due to 64bit alignment, the last long on a 32bit arch 561 /* because of the "extra long to catch oob access" we allocate in
528 * may be not used at all. The last used long will likely 562 * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page
529 * be only partially used, always. Don't count those bits, 563 * containing the last _relevant_ bitmap word */
530 * but mask them out. */ 564 last_page = bm_bit_to_page_idx(b, b->bm_bits-1);
531 words = (b->bm_bits + BITS_PER_LONG - 1) >> LN2_BPL; 565
532 566 /* all but last page */
533 while (offset < words) { 567 for (idx = 0; idx < last_page; idx++) {
534 i = do_now = min_t(size_t, words-offset, LWPP); 568 p_addr = __bm_map_pidx(b, idx, KM_USER0);
535 p_addr = __bm_map_pidx(b, bm_word_to_page_idx(b, offset), KM_USER0); 569 for (i = 0; i < LWPP; i++)
536 bm = p_addr + MLPP(offset); 570 bits += hweight_long(p_addr[i]);
537 while (i--) {
538 bits += hweight_long(*bm++);
539 }
540 offset += do_now;
541 if (offset == words) {
542 /* last word may only be partially used,
543 * see also bm_clear_surplus. */
544 i = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1;
545 if (i) {
546 bits -= hweight_long(p_addr[do_now-1] & ~i);
547 p_addr[do_now-1] &= i;
548 }
549 /* 32bit arch, may have an unused padding long */
550 if (words != b->bm_words)
551 p_addr[do_now] = 0;
552 }
553 __bm_unmap(p_addr, KM_USER0); 571 __bm_unmap(p_addr, KM_USER0);
554 cond_resched(); 572 cond_resched();
555 } 573 }
556 574 /* last (or only) page */
575 last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL;
576 p_addr = __bm_map_pidx(b, idx, KM_USER0);
577 for (i = 0; i < last_word; i++)
578 bits += hweight_long(p_addr[i]);
579 p_addr[last_word] &= cpu_to_lel(mask);
580 bits += hweight_long(p_addr[last_word]);
581 /* 32bit arch, may have an unused padding long */
582 if (BITS_PER_LONG == 32 && (last_word & 1) == 0)
583 p_addr[last_word+1] = 0;
584 __bm_unmap(p_addr, KM_USER0);
557 return bits; 585 return bits;
558} 586}
559 587
@@ -564,8 +592,6 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len)
564 unsigned int idx; 592 unsigned int idx;
565 size_t do_now, end; 593 size_t do_now, end;
566 594
567#define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512)
568
569 end = offset + len; 595 end = offset + len;
570 596
571 if (end > b->bm_words) { 597 if (end > b->bm_words) {
@@ -645,8 +671,14 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits)
645 words = ALIGN(bits, 64) >> LN2_BPL; 671 words = ALIGN(bits, 64) >> LN2_BPL;
646 672
647 if (get_ldev(mdev)) { 673 if (get_ldev(mdev)) {
648 D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); 674 u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12;
649 put_ldev(mdev); 675 put_ldev(mdev);
676 if (bits > bits_on_disk) {
677 dev_info(DEV, "bits = %lu\n", bits);
678 dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk);
679 err = -ENOSPC;
680 goto out;
681 }
650 } 682 }
651 683
652 /* one extra long to catch off by one errors */ 684 /* one extra long to catch off by one errors */
@@ -1113,9 +1145,12 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l
1113 * @mdev: DRBD device. 1145 * @mdev: DRBD device.
1114 * @idx: bitmap page index 1146 * @idx: bitmap page index
1115 * 1147 *
1116 * We don't want to special case on logical_block_size of the underlaying 1148 * We don't want to special case on logical_block_size of the backend device,
1117 * device, so we submit PAGE_SIZE aligned pieces containing the requested enr. 1149 * so we submit PAGE_SIZE aligned pieces.
1118 * Note that on "most" systems, PAGE_SIZE is 4k. 1150 * Note that on "most" systems, PAGE_SIZE is 4k.
1151 *
1152 * In case this becomes an issue on systems with larger PAGE_SIZE,
1153 * we may want to change this again to write 4k aligned 4k pieces.
1119 */ 1154 */
1120int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) 1155int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local)
1121{ 1156{
@@ -1144,52 +1179,57 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc
1144 1179
1145/* NOTE 1180/* NOTE
1146 * find_first_bit returns int, we return unsigned long. 1181 * find_first_bit returns int, we return unsigned long.
1147 * should not make much difference anyways, but ... 1182 * For this to work on 32bit arch with bitnumbers > (1<<32),
1183 * we'd need to return u64, and get a whole lot of other places
1184 * fixed where we still use unsigned long.
1148 * 1185 *
1149 * this returns a bit number, NOT a sector! 1186 * this returns a bit number, NOT a sector!
1150 */ 1187 */
1151#define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1)
1152static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, 1188static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo,
1153 const int find_zero_bit, const enum km_type km) 1189 const int find_zero_bit, const enum km_type km)
1154{ 1190{
1155 struct drbd_bitmap *b = mdev->bitmap; 1191 struct drbd_bitmap *b = mdev->bitmap;
1156 unsigned long i = -1UL;
1157 unsigned long *p_addr; 1192 unsigned long *p_addr;
1158 unsigned long bit_offset; /* bit offset of the mapped page. */ 1193 unsigned long bit_offset;
1194 unsigned i;
1195
1159 1196
1160 if (bm_fo > b->bm_bits) { 1197 if (bm_fo > b->bm_bits) {
1161 dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); 1198 dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits);
1199 bm_fo = DRBD_END_OF_BITMAP;
1162 } else { 1200 } else {
1163 while (bm_fo < b->bm_bits) { 1201 while (bm_fo < b->bm_bits) {
1164 /* bit offset of the first bit in the page */ 1202 /* bit offset of the first bit in the page */
1165 bit_offset = bm_fo & ~BPP_MASK; 1203 bit_offset = bm_fo & ~BITS_PER_PAGE_MASK;
1166 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); 1204 p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km);
1167 1205
1168 if (find_zero_bit) 1206 if (find_zero_bit)
1169 i = generic_find_next_zero_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); 1207 i = generic_find_next_zero_le_bit(p_addr,
1208 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
1170 else 1209 else
1171 i = generic_find_next_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); 1210 i = generic_find_next_le_bit(p_addr,
1211 PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK);
1172 1212
1173 __bm_unmap(p_addr, km); 1213 __bm_unmap(p_addr, km);
1174 if (i < PAGE_SIZE*8) { 1214 if (i < PAGE_SIZE*8) {
1175 i = bit_offset + i; 1215 bm_fo = bit_offset + i;
1176 if (i >= b->bm_bits) 1216 if (bm_fo >= b->bm_bits)
1177 break; 1217 break;
1178 goto found; 1218 goto found;
1179 } 1219 }
1180 bm_fo = bit_offset + PAGE_SIZE*8; 1220 bm_fo = bit_offset + PAGE_SIZE*8;
1181 } 1221 }
1182 i = -1UL; 1222 bm_fo = DRBD_END_OF_BITMAP;
1183 } 1223 }
1184 found: 1224 found:
1185 return i; 1225 return bm_fo;
1186} 1226}
1187 1227
1188static unsigned long bm_find_next(struct drbd_conf *mdev, 1228static unsigned long bm_find_next(struct drbd_conf *mdev,
1189 unsigned long bm_fo, const int find_zero_bit) 1229 unsigned long bm_fo, const int find_zero_bit)
1190{ 1230{
1191 struct drbd_bitmap *b = mdev->bitmap; 1231 struct drbd_bitmap *b = mdev->bitmap;
1192 unsigned long i = -1UL; 1232 unsigned long i = DRBD_END_OF_BITMAP;
1193 1233
1194 ERR_IF(!b) return i; 1234 ERR_IF(!b) return i;
1195 ERR_IF(!b->bm_pages) return i; 1235 ERR_IF(!b->bm_pages) return i;
@@ -1267,9 +1307,9 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s,
1267 last_page_nr = page_nr; 1307 last_page_nr = page_nr;
1268 } 1308 }
1269 if (val) 1309 if (val)
1270 c += (0 == generic___test_and_set_le_bit(bitnr & BPP_MASK, p_addr)); 1310 c += (0 == generic___test_and_set_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr));
1271 else 1311 else
1272 c -= (0 != generic___test_and_clear_le_bit(bitnr & BPP_MASK, p_addr)); 1312 c -= (0 != generic___test_and_clear_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr));
1273 } 1313 }
1274 if (p_addr) 1314 if (p_addr)
1275 __bm_unmap(p_addr, km); 1315 __bm_unmap(p_addr, km);
@@ -1418,7 +1458,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr)
1418 bm_print_lock_info(mdev); 1458 bm_print_lock_info(mdev);
1419 if (bitnr < b->bm_bits) { 1459 if (bitnr < b->bm_bits) {
1420 p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); 1460 p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr));
1421 i = generic_test_le_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; 1461 i = generic_test_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0;
1422 bm_unmap(p_addr); 1462 bm_unmap(p_addr);
1423 } else if (bitnr == b->bm_bits) { 1463 } else if (bitnr == b->bm_bits) {
1424 i = -1; 1464 i = -1;
@@ -1517,13 +1557,15 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr)
1517 return count; 1557 return count;
1518} 1558}
1519 1559
1520/* set all bits covered by the AL-extent al_enr */ 1560/* Set all bits covered by the AL-extent al_enr.
1561 * Returns number of bits changed. */
1521unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) 1562unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
1522{ 1563{
1523 struct drbd_bitmap *b = mdev->bitmap; 1564 struct drbd_bitmap *b = mdev->bitmap;
1524 unsigned long *p_addr, *bm; 1565 unsigned long *p_addr, *bm;
1525 unsigned long weight; 1566 unsigned long weight;
1526 int count, s, e, i, do_now; 1567 unsigned long s, e;
1568 int count, i, do_now;
1527 ERR_IF(!b) return 0; 1569 ERR_IF(!b) return 0;
1528 ERR_IF(!b->bm_pages) return 0; 1570 ERR_IF(!b->bm_pages) return 0;
1529 1571
@@ -1552,7 +1594,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr)
1552 if (e == b->bm_words) 1594 if (e == b->bm_words)
1553 b->bm_set -= bm_clear_surplus(b); 1595 b->bm_set -= bm_clear_surplus(b);
1554 } else { 1596 } else {
1555 dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); 1597 dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s);
1556 } 1598 }
1557 weight = b->bm_set - weight; 1599 weight = b->bm_set - weight;
1558 spin_unlock_irq(&b->bm_lock); 1600 spin_unlock_irq(&b->bm_lock);
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 74cc50a21822..5a2d0ec72b34 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1003,9 +1003,9 @@ struct drbd_conf {
1003 struct hlist_head *tl_hash; 1003 struct hlist_head *tl_hash;
1004 unsigned int tl_hash_s; 1004 unsigned int tl_hash_s;
1005 1005
1006 /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ 1006 /* blocks to resync in this run [unit BM_BLOCK_SIZE] */
1007 unsigned long rs_total; 1007 unsigned long rs_total;
1008 /* number of sync IOs that failed in this run */ 1008 /* number of resync blocks that failed in this run */
1009 unsigned long rs_failed; 1009 unsigned long rs_failed;
1010 /* Syncer's start time [unit jiffies] */ 1010 /* Syncer's start time [unit jiffies] */
1011 unsigned long rs_start; 1011 unsigned long rs_start;
@@ -1399,7 +1399,9 @@ struct bm_extent {
1399 * you should use 64bit OS for that much storage, anyways. */ 1399 * you should use 64bit OS for that much storage, anyways. */
1400#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) 1400#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff)
1401#else 1401#else
1402#define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32) 1402/* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */
1403#define DRBD_MAX_SECTORS_FLEX (1UL << 51)
1404/* corresponds to (1UL << 38) bits right now. */
1403#endif 1405#endif
1404#endif 1406#endif
1405 1407
@@ -1419,11 +1421,15 @@ extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new
1419extern void drbd_bm_cleanup(struct drbd_conf *mdev); 1421extern void drbd_bm_cleanup(struct drbd_conf *mdev);
1420extern void drbd_bm_set_all(struct drbd_conf *mdev); 1422extern void drbd_bm_set_all(struct drbd_conf *mdev);
1421extern void drbd_bm_clear_all(struct drbd_conf *mdev); 1423extern void drbd_bm_clear_all(struct drbd_conf *mdev);
1424/* set/clear/test only a few bits at a time */
1422extern int drbd_bm_set_bits( 1425extern int drbd_bm_set_bits(
1423 struct drbd_conf *mdev, unsigned long s, unsigned long e); 1426 struct drbd_conf *mdev, unsigned long s, unsigned long e);
1424extern int drbd_bm_clear_bits( 1427extern int drbd_bm_clear_bits(
1425 struct drbd_conf *mdev, unsigned long s, unsigned long e); 1428 struct drbd_conf *mdev, unsigned long s, unsigned long e);
1426/* bm_set_bits variant for use while holding drbd_bm_lock */ 1429extern int drbd_bm_count_bits(
1430 struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
1431/* bm_set_bits variant for use while holding drbd_bm_lock,
1432 * may process the whole bitmap in one go */
1427extern void _drbd_bm_set_bits(struct drbd_conf *mdev, 1433extern void _drbd_bm_set_bits(struct drbd_conf *mdev,
1428 const unsigned long s, const unsigned long e); 1434 const unsigned long s, const unsigned long e);
1429extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); 1435extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr);
@@ -1436,6 +1442,8 @@ extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev,
1436extern size_t drbd_bm_words(struct drbd_conf *mdev); 1442extern size_t drbd_bm_words(struct drbd_conf *mdev);
1437extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); 1443extern unsigned long drbd_bm_bits(struct drbd_conf *mdev);
1438extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); 1444extern sector_t drbd_bm_capacity(struct drbd_conf *mdev);
1445
1446#define DRBD_END_OF_BITMAP (~(unsigned long)0)
1439extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); 1447extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
1440/* bm_find_next variants for use while you hold drbd_bm_lock() */ 1448/* bm_find_next variants for use while you hold drbd_bm_lock() */
1441extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); 1449extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo);
@@ -1452,8 +1460,6 @@ extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset,
1452 1460
1453extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); 1461extern void drbd_bm_lock(struct drbd_conf *mdev, char *why);
1454extern void drbd_bm_unlock(struct drbd_conf *mdev); 1462extern void drbd_bm_unlock(struct drbd_conf *mdev);
1455
1456extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e);
1457/* drbd_main.c */ 1463/* drbd_main.c */
1458 1464
1459extern struct kmem_cache *drbd_request_cache; 1465extern struct kmem_cache *drbd_request_cache;
@@ -2158,10 +2164,8 @@ extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins)
2158static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, 2164static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
2159 unsigned long *bits_left, unsigned int *per_mil_done) 2165 unsigned long *bits_left, unsigned int *per_mil_done)
2160{ 2166{
2161 /* 2167 /* this is to break it at compile time when we change that, in case we
2162 * this is to break it at compile time when we change that 2168 * want to support more than (1<<32) bits on a 32bit arch. */
2163 * (we may feel 4TB maximum storage per drbd is not enough)
2164 */
2165 typecheck(unsigned long, mdev->rs_total); 2169 typecheck(unsigned long, mdev->rs_total);
2166 2170
2167 /* note: both rs_total and rs_left are in bits, i.e. in 2171 /* note: both rs_total and rs_left are in bits, i.e. in
@@ -2186,10 +2190,19 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev,
2186 *bits_left, mdev->rs_total, mdev->rs_failed); 2190 *bits_left, mdev->rs_total, mdev->rs_failed);
2187 *per_mil_done = 0; 2191 *per_mil_done = 0;
2188 } else { 2192 } else {
2189 /* make sure the calculation happens in long context */ 2193 /* Make sure the division happens in long context.
2190 unsigned long tmp = 1000UL - 2194 * We allow up to one petabyte storage right now,
2191 (*bits_left >> 10)*1000UL 2195 * at a granularity of 4k per bit that is 2**38 bits.
2192 / ((mdev->rs_total >> 10) + 1UL); 2196 * After shift right and multiplication by 1000,
2197 * this should still fit easily into a 32bit long,
2198 * so we don't need a 64bit division on 32bit arch.
2199 * Note: currently we don't support such large bitmaps on 32bit
2200 * arch anyways, but no harm done to be prepared for it here.
2201 */
2202 unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10;
2203 unsigned long left = *bits_left >> shift;
2204 unsigned long total = 1UL + (mdev->rs_total >> shift);
2205 unsigned long tmp = 1000UL - left * 1000UL/total;
2193 *per_mil_done = tmp; 2206 *per_mil_done = tmp;
2194 } 2207 }
2195} 2208}
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 77dc022eaf6b..a46bc0287e21 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -527,17 +527,19 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev,
527 } 527 }
528} 528}
529 529
530/* input size is expected to be in KB */
530char *ppsize(char *buf, unsigned long long size) 531char *ppsize(char *buf, unsigned long long size)
531{ 532{
532 /* Needs 9 bytes at max. */ 533 /* Needs 9 bytes at max including trailing NUL:
534 * -1ULL ==> "16384 EB" */
533 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; 535 static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' };
534 int base = 0; 536 int base = 0;
535 while (size >= 10000) { 537 while (size >= 10000 && base < sizeof(units)-1) {
536 /* shift + round */ 538 /* shift + round */
537 size = (size >> 10) + !!(size & (1<<9)); 539 size = (size >> 10) + !!(size & (1<<9));
538 base++; 540 base++;
539 } 541 }
540 sprintf(buf, "%lu %cB", (long)size, units[base]); 542 sprintf(buf, "%u %cB", (unsigned)size, units[base]);
541 543
542 return buf; 544 return buf;
543} 545}
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c
index efba62cd2e58..2959cdfb77f5 100644
--- a/drivers/block/drbd/drbd_proc.c
+++ b/drivers/block/drbd/drbd_proc.c
@@ -91,9 +91,9 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq)
91 seq_printf(seq, "sync'ed:"); 91 seq_printf(seq, "sync'ed:");
92 seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); 92 seq_printf(seq, "%3u.%u%% ", res / 10, res % 10);
93 93
94 /* if more than 1 GB display in MB */ 94 /* if more than a few GB, display in MB */
95 if (mdev->rs_total > 0x100000L) 95 if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT)))
96 seq_printf(seq, "(%lu/%lu)M\n\t", 96 seq_printf(seq, "(%lu/%lu)M",
97 (unsigned long) Bit2KB(rs_left >> 10), 97 (unsigned long) Bit2KB(rs_left >> 10),
98 (unsigned long) Bit2KB(mdev->rs_total >> 10)); 98 (unsigned long) Bit2KB(mdev->rs_total >> 10));
99 else 99 else
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index d17f2ed777ce..be46084c254e 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -577,7 +577,7 @@ next_sector:
577 size = BM_BLOCK_SIZE; 577 size = BM_BLOCK_SIZE;
578 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); 578 bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo);
579 579
580 if (bit == -1UL) { 580 if (bit == DRBD_END_OF_BITMAP) {
581 mdev->bm_resync_fo = drbd_bm_bits(mdev); 581 mdev->bm_resync_fo = drbd_bm_bits(mdev);
582 mdev->resync_work.cb = w_resync_inactive; 582 mdev->resync_work.cb = w_resync_inactive;
583 put_ldev(mdev); 583 put_ldev(mdev);