diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2010-12-14 09:13:04 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2011-03-10 05:43:24 -0500 |
commit | 4b0715f09655e76ca24c35a9e25e7c464c2f7346 (patch) | |
tree | e98706a35b1e18cad09f01d2346d9a1c938c081c /drivers/block/drbd | |
parent | 19f843aa08e2d8f87a09b4c2edc43b00638423a8 (diff) |
drbd: allow petabyte storage on 64bit arch
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Diffstat (limited to 'drivers/block/drbd')
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 170 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 41 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_nl.c | 8 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_proc.c | 6 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_worker.c | 2 |
5 files changed, 142 insertions, 85 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index 72cd41a96ef9..0e31e573af72 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -37,10 +37,46 @@ | |||
37 | * convention: | 37 | * convention: |
38 | * function name drbd_bm_... => used elsewhere, "public". | 38 | * function name drbd_bm_... => used elsewhere, "public". |
39 | * function name bm_... => internal to implementation, "private". | 39 | * function name bm_... => internal to implementation, "private". |
40 | */ | ||
41 | |||
40 | 42 | ||
41 | * Note that since find_first_bit returns int, at the current granularity of | 43 | /* |
42 | * the bitmap (4KB per byte), this implementation "only" supports up to | 44 | * LIMITATIONS: |
43 | * 1<<(32+12) == 16 TB... | 45 | * We want to support >= peta byte of backend storage, while for now still using |
46 | * a granularity of one bit per 4KiB of storage. | ||
47 | * 1 << 50 bytes backend storage (1 PiB) | ||
48 | * 1 << (50 - 12) bits needed | ||
49 | * 38 --> we need u64 to index and count bits | ||
50 | * 1 << (38 - 3) bitmap bytes needed | ||
51 | * 35 --> we still need u64 to index and count bytes | ||
52 | * (that's 32 GiB of bitmap for 1 PiB storage) | ||
53 | * 1 << (35 - 2) 32bit longs needed | ||
54 | * 33 --> we'd even need u64 to index and count 32bit long words. | ||
55 | * 1 << (35 - 3) 64bit longs needed | ||
56 | * 32 --> we could get away with a 32bit unsigned int to index and count | ||
57 | * 64bit long words, but I rather stay with unsigned long for now. | ||
58 | * We probably should neither count nor point to bytes or long words | ||
59 | * directly, but either by bitnumber, or by page index and offset. | ||
60 | * 1 << (35 - 12) | ||
61 | * 22 --> we need that much 4KiB pages of bitmap. | ||
62 | * 1 << (22 + 3) --> on a 64bit arch, | ||
63 | * we need 32 MiB to store the array of page pointers. | ||
64 | * | ||
65 | * Because I'm lazy, and because the resulting patch was too large, too ugly | ||
66 | * and still incomplete, on 32bit we still "only" support 16 TiB (minus some), | ||
67 | * (1 << 32) bits * 4k storage. | ||
68 | * | ||
69 | |||
70 | * bitmap storage and IO: | ||
71 | * Bitmap is stored little endian on disk, and is kept little endian in | ||
72 | * core memory. Currently we still hold the full bitmap in core as long | ||
73 | * as we are "attached" to a local disk, which at 32 GiB for 1PiB storage | ||
74 | * seems excessive. | ||
75 | * | ||
76 | * We plan to reduce the amount of in-core bitmap pages by pageing them in | ||
77 | * and out against their on-disk location as necessary, but need to make | ||
78 | * sure we don't cause too much meta data IO, and must not deadlock in | ||
79 | * tight memory situations. This needs some more work. | ||
44 | */ | 80 | */ |
45 | 81 | ||
46 | /* | 82 | /* |
@@ -56,13 +92,9 @@ | |||
56 | struct drbd_bitmap { | 92 | struct drbd_bitmap { |
57 | struct page **bm_pages; | 93 | struct page **bm_pages; |
58 | spinlock_t bm_lock; | 94 | spinlock_t bm_lock; |
59 | /* WARNING unsigned long bm_*: | 95 | |
60 | * 32bit number of bit offset is just enough for 512 MB bitmap. | 96 | /* see LIMITATIONS: above */ |
61 | * it will blow up if we make the bitmap bigger... | 97 | |
62 | * not that it makes much sense to have a bitmap that large, | ||
63 | * rather change the granularity to 16k or 64k or something. | ||
64 | * (that implies other problems, however...) | ||
65 | */ | ||
66 | unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ | 98 | unsigned long bm_set; /* nr of set bits; THINK maybe atomic_t? */ |
67 | unsigned long bm_bits; | 99 | unsigned long bm_bits; |
68 | size_t bm_words; | 100 | size_t bm_words; |
@@ -517,43 +549,39 @@ static void bm_set_surplus(struct drbd_bitmap *b) | |||
517 | bm_unmap(p_addr); | 549 | bm_unmap(p_addr); |
518 | } | 550 | } |
519 | 551 | ||
552 | /* you better not modify the bitmap while this is running, | ||
553 | * or its results will be stale */ | ||
520 | static unsigned long bm_count_bits(struct drbd_bitmap *b) | 554 | static unsigned long bm_count_bits(struct drbd_bitmap *b) |
521 | { | 555 | { |
522 | unsigned long *p_addr, *bm, offset = 0; | 556 | unsigned long *p_addr; |
523 | unsigned long bits = 0; | 557 | unsigned long bits = 0; |
524 | unsigned long i, do_now; | 558 | unsigned long mask = (1UL << (b->bm_bits & BITS_PER_LONG_MASK)) -1; |
525 | unsigned long words; | 559 | int idx, last_page, i, last_word; |
526 | 560 | ||
527 | /* due to 64bit alignment, the last long on a 32bit arch | 561 | /* because of the "extra long to catch oob access" we allocate in |
528 | * may be not used at all. The last used long will likely | 562 | * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page |
529 | * be only partially used, always. Don't count those bits, | 563 | * containing the last _relevant_ bitmap word */ |
530 | * but mask them out. */ | 564 | last_page = bm_bit_to_page_idx(b, b->bm_bits-1); |
531 | words = (b->bm_bits + BITS_PER_LONG - 1) >> LN2_BPL; | 565 | |
532 | 566 | /* all but last page */ | |
533 | while (offset < words) { | 567 | for (idx = 0; idx < last_page; idx++) { |
534 | i = do_now = min_t(size_t, words-offset, LWPP); | 568 | p_addr = __bm_map_pidx(b, idx, KM_USER0); |
535 | p_addr = __bm_map_pidx(b, bm_word_to_page_idx(b, offset), KM_USER0); | 569 | for (i = 0; i < LWPP; i++) |
536 | bm = p_addr + MLPP(offset); | 570 | bits += hweight_long(p_addr[i]); |
537 | while (i--) { | ||
538 | bits += hweight_long(*bm++); | ||
539 | } | ||
540 | offset += do_now; | ||
541 | if (offset == words) { | ||
542 | /* last word may only be partially used, | ||
543 | * see also bm_clear_surplus. */ | ||
544 | i = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) -1; | ||
545 | if (i) { | ||
546 | bits -= hweight_long(p_addr[do_now-1] & ~i); | ||
547 | p_addr[do_now-1] &= i; | ||
548 | } | ||
549 | /* 32bit arch, may have an unused padding long */ | ||
550 | if (words != b->bm_words) | ||
551 | p_addr[do_now] = 0; | ||
552 | } | ||
553 | __bm_unmap(p_addr, KM_USER0); | 571 | __bm_unmap(p_addr, KM_USER0); |
554 | cond_resched(); | 572 | cond_resched(); |
555 | } | 573 | } |
556 | 574 | /* last (or only) page */ | |
575 | last_word = ((b->bm_bits - 1) & BITS_PER_PAGE_MASK) >> LN2_BPL; | ||
576 | p_addr = __bm_map_pidx(b, idx, KM_USER0); | ||
577 | for (i = 0; i < last_word; i++) | ||
578 | bits += hweight_long(p_addr[i]); | ||
579 | p_addr[last_word] &= cpu_to_lel(mask); | ||
580 | bits += hweight_long(p_addr[last_word]); | ||
581 | /* 32bit arch, may have an unused padding long */ | ||
582 | if (BITS_PER_LONG == 32 && (last_word & 1) == 0) | ||
583 | p_addr[last_word+1] = 0; | ||
584 | __bm_unmap(p_addr, KM_USER0); | ||
557 | return bits; | 585 | return bits; |
558 | } | 586 | } |
559 | 587 | ||
@@ -564,8 +592,6 @@ static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | |||
564 | unsigned int idx; | 592 | unsigned int idx; |
565 | size_t do_now, end; | 593 | size_t do_now, end; |
566 | 594 | ||
567 | #define BM_SECTORS_PER_BIT (BM_BLOCK_SIZE/512) | ||
568 | |||
569 | end = offset + len; | 595 | end = offset + len; |
570 | 596 | ||
571 | if (end > b->bm_words) { | 597 | if (end > b->bm_words) { |
@@ -645,8 +671,14 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) | |||
645 | words = ALIGN(bits, 64) >> LN2_BPL; | 671 | words = ALIGN(bits, 64) >> LN2_BPL; |
646 | 672 | ||
647 | if (get_ldev(mdev)) { | 673 | if (get_ldev(mdev)) { |
648 | D_ASSERT((u64)bits <= (((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12)); | 674 | u64 bits_on_disk = ((u64)mdev->ldev->md.md_size_sect-MD_BM_OFFSET) << 12; |
649 | put_ldev(mdev); | 675 | put_ldev(mdev); |
676 | if (bits > bits_on_disk) { | ||
677 | dev_info(DEV, "bits = %lu\n", bits); | ||
678 | dev_info(DEV, "bits_on_disk = %llu\n", bits_on_disk); | ||
679 | err = -ENOSPC; | ||
680 | goto out; | ||
681 | } | ||
650 | } | 682 | } |
651 | 683 | ||
652 | /* one extra long to catch off by one errors */ | 684 | /* one extra long to catch off by one errors */ |
@@ -1113,9 +1145,12 @@ int drbd_bm_write_lazy(struct drbd_conf *mdev, unsigned upper_idx) __must_hold(l | |||
1113 | * @mdev: DRBD device. | 1145 | * @mdev: DRBD device. |
1114 | * @idx: bitmap page index | 1146 | * @idx: bitmap page index |
1115 | * | 1147 | * |
1116 | * We don't want to special case on logical_block_size of the underlaying | 1148 | * We don't want to special case on logical_block_size of the backend device, |
1117 | * device, so we submit PAGE_SIZE aligned pieces containing the requested enr. | 1149 | * so we submit PAGE_SIZE aligned pieces. |
1118 | * Note that on "most" systems, PAGE_SIZE is 4k. | 1150 | * Note that on "most" systems, PAGE_SIZE is 4k. |
1151 | * | ||
1152 | * In case this becomes an issue on systems with larger PAGE_SIZE, | ||
1153 | * we may want to change this again to write 4k aligned 4k pieces. | ||
1119 | */ | 1154 | */ |
1120 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) | 1155 | int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(local) |
1121 | { | 1156 | { |
@@ -1144,52 +1179,57 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc | |||
1144 | 1179 | ||
1145 | /* NOTE | 1180 | /* NOTE |
1146 | * find_first_bit returns int, we return unsigned long. | 1181 | * find_first_bit returns int, we return unsigned long. |
1147 | * should not make much difference anyways, but ... | 1182 | * For this to work on 32bit arch with bitnumbers > (1<<32), |
1183 | * we'd need to return u64, and get a whole lot of other places | ||
1184 | * fixed where we still use unsigned long. | ||
1148 | * | 1185 | * |
1149 | * this returns a bit number, NOT a sector! | 1186 | * this returns a bit number, NOT a sector! |
1150 | */ | 1187 | */ |
1151 | #define BPP_MASK ((1UL << (PAGE_SHIFT+3)) - 1) | ||
1152 | static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, | 1188 | static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, |
1153 | const int find_zero_bit, const enum km_type km) | 1189 | const int find_zero_bit, const enum km_type km) |
1154 | { | 1190 | { |
1155 | struct drbd_bitmap *b = mdev->bitmap; | 1191 | struct drbd_bitmap *b = mdev->bitmap; |
1156 | unsigned long i = -1UL; | ||
1157 | unsigned long *p_addr; | 1192 | unsigned long *p_addr; |
1158 | unsigned long bit_offset; /* bit offset of the mapped page. */ | 1193 | unsigned long bit_offset; |
1194 | unsigned i; | ||
1195 | |||
1159 | 1196 | ||
1160 | if (bm_fo > b->bm_bits) { | 1197 | if (bm_fo > b->bm_bits) { |
1161 | dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); | 1198 | dev_err(DEV, "bm_fo=%lu bm_bits=%lu\n", bm_fo, b->bm_bits); |
1199 | bm_fo = DRBD_END_OF_BITMAP; | ||
1162 | } else { | 1200 | } else { |
1163 | while (bm_fo < b->bm_bits) { | 1201 | while (bm_fo < b->bm_bits) { |
1164 | /* bit offset of the first bit in the page */ | 1202 | /* bit offset of the first bit in the page */ |
1165 | bit_offset = bm_fo & ~BPP_MASK; | 1203 | bit_offset = bm_fo & ~BITS_PER_PAGE_MASK; |
1166 | p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); | 1204 | p_addr = __bm_map_pidx(b, bm_bit_to_page_idx(b, bm_fo), km); |
1167 | 1205 | ||
1168 | if (find_zero_bit) | 1206 | if (find_zero_bit) |
1169 | i = generic_find_next_zero_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); | 1207 | i = generic_find_next_zero_le_bit(p_addr, |
1208 | PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); | ||
1170 | else | 1209 | else |
1171 | i = generic_find_next_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); | 1210 | i = generic_find_next_le_bit(p_addr, |
1211 | PAGE_SIZE*8, bm_fo & BITS_PER_PAGE_MASK); | ||
1172 | 1212 | ||
1173 | __bm_unmap(p_addr, km); | 1213 | __bm_unmap(p_addr, km); |
1174 | if (i < PAGE_SIZE*8) { | 1214 | if (i < PAGE_SIZE*8) { |
1175 | i = bit_offset + i; | 1215 | bm_fo = bit_offset + i; |
1176 | if (i >= b->bm_bits) | 1216 | if (bm_fo >= b->bm_bits) |
1177 | break; | 1217 | break; |
1178 | goto found; | 1218 | goto found; |
1179 | } | 1219 | } |
1180 | bm_fo = bit_offset + PAGE_SIZE*8; | 1220 | bm_fo = bit_offset + PAGE_SIZE*8; |
1181 | } | 1221 | } |
1182 | i = -1UL; | 1222 | bm_fo = DRBD_END_OF_BITMAP; |
1183 | } | 1223 | } |
1184 | found: | 1224 | found: |
1185 | return i; | 1225 | return bm_fo; |
1186 | } | 1226 | } |
1187 | 1227 | ||
1188 | static unsigned long bm_find_next(struct drbd_conf *mdev, | 1228 | static unsigned long bm_find_next(struct drbd_conf *mdev, |
1189 | unsigned long bm_fo, const int find_zero_bit) | 1229 | unsigned long bm_fo, const int find_zero_bit) |
1190 | { | 1230 | { |
1191 | struct drbd_bitmap *b = mdev->bitmap; | 1231 | struct drbd_bitmap *b = mdev->bitmap; |
1192 | unsigned long i = -1UL; | 1232 | unsigned long i = DRBD_END_OF_BITMAP; |
1193 | 1233 | ||
1194 | ERR_IF(!b) return i; | 1234 | ERR_IF(!b) return i; |
1195 | ERR_IF(!b->bm_pages) return i; | 1235 | ERR_IF(!b->bm_pages) return i; |
@@ -1267,9 +1307,9 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
1267 | last_page_nr = page_nr; | 1307 | last_page_nr = page_nr; |
1268 | } | 1308 | } |
1269 | if (val) | 1309 | if (val) |
1270 | c += (0 == generic___test_and_set_le_bit(bitnr & BPP_MASK, p_addr)); | 1310 | c += (0 == generic___test_and_set_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr)); |
1271 | else | 1311 | else |
1272 | c -= (0 != generic___test_and_clear_le_bit(bitnr & BPP_MASK, p_addr)); | 1312 | c -= (0 != generic___test_and_clear_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr)); |
1273 | } | 1313 | } |
1274 | if (p_addr) | 1314 | if (p_addr) |
1275 | __bm_unmap(p_addr, km); | 1315 | __bm_unmap(p_addr, km); |
@@ -1418,7 +1458,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) | |||
1418 | bm_print_lock_info(mdev); | 1458 | bm_print_lock_info(mdev); |
1419 | if (bitnr < b->bm_bits) { | 1459 | if (bitnr < b->bm_bits) { |
1420 | p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); | 1460 | p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, bitnr)); |
1421 | i = generic_test_le_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; | 1461 | i = generic_test_le_bit(bitnr & BITS_PER_PAGE_MASK, p_addr) ? 1 : 0; |
1422 | bm_unmap(p_addr); | 1462 | bm_unmap(p_addr); |
1423 | } else if (bitnr == b->bm_bits) { | 1463 | } else if (bitnr == b->bm_bits) { |
1424 | i = -1; | 1464 | i = -1; |
@@ -1517,13 +1557,15 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) | |||
1517 | return count; | 1557 | return count; |
1518 | } | 1558 | } |
1519 | 1559 | ||
1520 | /* set all bits covered by the AL-extent al_enr */ | 1560 | /* Set all bits covered by the AL-extent al_enr. |
1561 | * Returns number of bits changed. */ | ||
1521 | unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | 1562 | unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) |
1522 | { | 1563 | { |
1523 | struct drbd_bitmap *b = mdev->bitmap; | 1564 | struct drbd_bitmap *b = mdev->bitmap; |
1524 | unsigned long *p_addr, *bm; | 1565 | unsigned long *p_addr, *bm; |
1525 | unsigned long weight; | 1566 | unsigned long weight; |
1526 | int count, s, e, i, do_now; | 1567 | unsigned long s, e; |
1568 | int count, i, do_now; | ||
1527 | ERR_IF(!b) return 0; | 1569 | ERR_IF(!b) return 0; |
1528 | ERR_IF(!b->bm_pages) return 0; | 1570 | ERR_IF(!b->bm_pages) return 0; |
1529 | 1571 | ||
@@ -1552,7 +1594,7 @@ unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) | |||
1552 | if (e == b->bm_words) | 1594 | if (e == b->bm_words) |
1553 | b->bm_set -= bm_clear_surplus(b); | 1595 | b->bm_set -= bm_clear_surplus(b); |
1554 | } else { | 1596 | } else { |
1555 | dev_err(DEV, "start offset (%d) too large in drbd_bm_ALe_set_all\n", s); | 1597 | dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s); |
1556 | } | 1598 | } |
1557 | weight = b->bm_set - weight; | 1599 | weight = b->bm_set - weight; |
1558 | spin_unlock_irq(&b->bm_lock); | 1600 | spin_unlock_irq(&b->bm_lock); |
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 74cc50a21822..5a2d0ec72b34 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h | |||
@@ -1003,9 +1003,9 @@ struct drbd_conf { | |||
1003 | struct hlist_head *tl_hash; | 1003 | struct hlist_head *tl_hash; |
1004 | unsigned int tl_hash_s; | 1004 | unsigned int tl_hash_s; |
1005 | 1005 | ||
1006 | /* blocks to sync in this run [unit BM_BLOCK_SIZE] */ | 1006 | /* blocks to resync in this run [unit BM_BLOCK_SIZE] */ |
1007 | unsigned long rs_total; | 1007 | unsigned long rs_total; |
1008 | /* number of sync IOs that failed in this run */ | 1008 | /* number of resync blocks that failed in this run */ |
1009 | unsigned long rs_failed; | 1009 | unsigned long rs_failed; |
1010 | /* Syncer's start time [unit jiffies] */ | 1010 | /* Syncer's start time [unit jiffies] */ |
1011 | unsigned long rs_start; | 1011 | unsigned long rs_start; |
@@ -1399,7 +1399,9 @@ struct bm_extent { | |||
1399 | * you should use 64bit OS for that much storage, anyways. */ | 1399 | * you should use 64bit OS for that much storage, anyways. */ |
1400 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) | 1400 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0xffff7fff) |
1401 | #else | 1401 | #else |
1402 | #define DRBD_MAX_SECTORS_FLEX BM_BIT_TO_SECT(0x1LU << 32) | 1402 | /* we allow up to 1 PiB now on 64bit architecture with "flexible" meta data */ |
1403 | #define DRBD_MAX_SECTORS_FLEX (1UL << 51) | ||
1404 | /* corresponds to (1UL << 38) bits right now. */ | ||
1403 | #endif | 1405 | #endif |
1404 | #endif | 1406 | #endif |
1405 | 1407 | ||
@@ -1419,11 +1421,15 @@ extern int drbd_bm_resize(struct drbd_conf *mdev, sector_t sectors, int set_new | |||
1419 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); | 1421 | extern void drbd_bm_cleanup(struct drbd_conf *mdev); |
1420 | extern void drbd_bm_set_all(struct drbd_conf *mdev); | 1422 | extern void drbd_bm_set_all(struct drbd_conf *mdev); |
1421 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); | 1423 | extern void drbd_bm_clear_all(struct drbd_conf *mdev); |
1424 | /* set/clear/test only a few bits at a time */ | ||
1422 | extern int drbd_bm_set_bits( | 1425 | extern int drbd_bm_set_bits( |
1423 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | 1426 | struct drbd_conf *mdev, unsigned long s, unsigned long e); |
1424 | extern int drbd_bm_clear_bits( | 1427 | extern int drbd_bm_clear_bits( |
1425 | struct drbd_conf *mdev, unsigned long s, unsigned long e); | 1428 | struct drbd_conf *mdev, unsigned long s, unsigned long e); |
1426 | /* bm_set_bits variant for use while holding drbd_bm_lock */ | 1429 | extern int drbd_bm_count_bits( |
1430 | struct drbd_conf *mdev, const unsigned long s, const unsigned long e); | ||
1431 | /* bm_set_bits variant for use while holding drbd_bm_lock, | ||
1432 | * may process the whole bitmap in one go */ | ||
1427 | extern void _drbd_bm_set_bits(struct drbd_conf *mdev, | 1433 | extern void _drbd_bm_set_bits(struct drbd_conf *mdev, |
1428 | const unsigned long s, const unsigned long e); | 1434 | const unsigned long s, const unsigned long e); |
1429 | extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); | 1435 | extern int drbd_bm_test_bit(struct drbd_conf *mdev, unsigned long bitnr); |
@@ -1436,6 +1442,8 @@ extern unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, | |||
1436 | extern size_t drbd_bm_words(struct drbd_conf *mdev); | 1442 | extern size_t drbd_bm_words(struct drbd_conf *mdev); |
1437 | extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); | 1443 | extern unsigned long drbd_bm_bits(struct drbd_conf *mdev); |
1438 | extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); | 1444 | extern sector_t drbd_bm_capacity(struct drbd_conf *mdev); |
1445 | |||
1446 | #define DRBD_END_OF_BITMAP (~(unsigned long)0) | ||
1439 | extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | 1447 | extern unsigned long drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); |
1440 | /* bm_find_next variants for use while you hold drbd_bm_lock() */ | 1448 | /* bm_find_next variants for use while you hold drbd_bm_lock() */ |
1441 | extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); | 1449 | extern unsigned long _drbd_bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo); |
@@ -1452,8 +1460,6 @@ extern void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, | |||
1452 | 1460 | ||
1453 | extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); | 1461 | extern void drbd_bm_lock(struct drbd_conf *mdev, char *why); |
1454 | extern void drbd_bm_unlock(struct drbd_conf *mdev); | 1462 | extern void drbd_bm_unlock(struct drbd_conf *mdev); |
1455 | |||
1456 | extern int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsigned long e); | ||
1457 | /* drbd_main.c */ | 1463 | /* drbd_main.c */ |
1458 | 1464 | ||
1459 | extern struct kmem_cache *drbd_request_cache; | 1465 | extern struct kmem_cache *drbd_request_cache; |
@@ -2158,10 +2164,8 @@ extern int _get_ldev_if_state(struct drbd_conf *mdev, enum drbd_disk_state mins) | |||
2158 | static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | 2164 | static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, |
2159 | unsigned long *bits_left, unsigned int *per_mil_done) | 2165 | unsigned long *bits_left, unsigned int *per_mil_done) |
2160 | { | 2166 | { |
2161 | /* | 2167 | /* this is to break it at compile time when we change that, in case we |
2162 | * this is to break it at compile time when we change that | 2168 | * want to support more than (1<<32) bits on a 32bit arch. */ |
2163 | * (we may feel 4TB maximum storage per drbd is not enough) | ||
2164 | */ | ||
2165 | typecheck(unsigned long, mdev->rs_total); | 2169 | typecheck(unsigned long, mdev->rs_total); |
2166 | 2170 | ||
2167 | /* note: both rs_total and rs_left are in bits, i.e. in | 2171 | /* note: both rs_total and rs_left are in bits, i.e. in |
@@ -2186,10 +2190,19 @@ static inline void drbd_get_syncer_progress(struct drbd_conf *mdev, | |||
2186 | *bits_left, mdev->rs_total, mdev->rs_failed); | 2190 | *bits_left, mdev->rs_total, mdev->rs_failed); |
2187 | *per_mil_done = 0; | 2191 | *per_mil_done = 0; |
2188 | } else { | 2192 | } else { |
2189 | /* make sure the calculation happens in long context */ | 2193 | /* Make sure the division happens in long context. |
2190 | unsigned long tmp = 1000UL - | 2194 | * We allow up to one petabyte storage right now, |
2191 | (*bits_left >> 10)*1000UL | 2195 | * at a granularity of 4k per bit that is 2**38 bits. |
2192 | / ((mdev->rs_total >> 10) + 1UL); | 2196 | * After shift right and multiplication by 1000, |
2197 | * this should still fit easily into a 32bit long, | ||
2198 | * so we don't need a 64bit division on 32bit arch. | ||
2199 | * Note: currently we don't support such large bitmaps on 32bit | ||
2200 | * arch anyways, but no harm done to be prepared for it here. | ||
2201 | */ | ||
2202 | unsigned int shift = mdev->rs_total >= (1ULL << 32) ? 16 : 10; | ||
2203 | unsigned long left = *bits_left >> shift; | ||
2204 | unsigned long total = 1UL + (mdev->rs_total >> shift); | ||
2205 | unsigned long tmp = 1000UL - left * 1000UL/total; | ||
2193 | *per_mil_done = tmp; | 2206 | *per_mil_done = tmp; |
2194 | } | 2207 | } |
2195 | } | 2208 | } |
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c index 77dc022eaf6b..a46bc0287e21 100644 --- a/drivers/block/drbd/drbd_nl.c +++ b/drivers/block/drbd/drbd_nl.c | |||
@@ -527,17 +527,19 @@ static void drbd_md_set_sector_offsets(struct drbd_conf *mdev, | |||
527 | } | 527 | } |
528 | } | 528 | } |
529 | 529 | ||
530 | /* input size is expected to be in KB */ | ||
530 | char *ppsize(char *buf, unsigned long long size) | 531 | char *ppsize(char *buf, unsigned long long size) |
531 | { | 532 | { |
532 | /* Needs 9 bytes at max. */ | 533 | /* Needs 9 bytes at max including trailing NUL: |
534 | * -1ULL ==> "16384 EB" */ | ||
533 | static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; | 535 | static char units[] = { 'K', 'M', 'G', 'T', 'P', 'E' }; |
534 | int base = 0; | 536 | int base = 0; |
535 | while (size >= 10000) { | 537 | while (size >= 10000 && base < sizeof(units)-1) { |
536 | /* shift + round */ | 538 | /* shift + round */ |
537 | size = (size >> 10) + !!(size & (1<<9)); | 539 | size = (size >> 10) + !!(size & (1<<9)); |
538 | base++; | 540 | base++; |
539 | } | 541 | } |
540 | sprintf(buf, "%lu %cB", (long)size, units[base]); | 542 | sprintf(buf, "%u %cB", (unsigned)size, units[base]); |
541 | 543 | ||
542 | return buf; | 544 | return buf; |
543 | } | 545 | } |
diff --git a/drivers/block/drbd/drbd_proc.c b/drivers/block/drbd/drbd_proc.c index efba62cd2e58..2959cdfb77f5 100644 --- a/drivers/block/drbd/drbd_proc.c +++ b/drivers/block/drbd/drbd_proc.c | |||
@@ -91,9 +91,9 @@ static void drbd_syncer_progress(struct drbd_conf *mdev, struct seq_file *seq) | |||
91 | seq_printf(seq, "sync'ed:"); | 91 | seq_printf(seq, "sync'ed:"); |
92 | seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); | 92 | seq_printf(seq, "%3u.%u%% ", res / 10, res % 10); |
93 | 93 | ||
94 | /* if more than 1 GB display in MB */ | 94 | /* if more than a few GB, display in MB */ |
95 | if (mdev->rs_total > 0x100000L) | 95 | if (mdev->rs_total > (4UL << (30 - BM_BLOCK_SHIFT))) |
96 | seq_printf(seq, "(%lu/%lu)M\n\t", | 96 | seq_printf(seq, "(%lu/%lu)M", |
97 | (unsigned long) Bit2KB(rs_left >> 10), | 97 | (unsigned long) Bit2KB(rs_left >> 10), |
98 | (unsigned long) Bit2KB(mdev->rs_total >> 10)); | 98 | (unsigned long) Bit2KB(mdev->rs_total >> 10)); |
99 | else | 99 | else |
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index d17f2ed777ce..be46084c254e 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c | |||
@@ -577,7 +577,7 @@ next_sector: | |||
577 | size = BM_BLOCK_SIZE; | 577 | size = BM_BLOCK_SIZE; |
578 | bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); | 578 | bit = drbd_bm_find_next(mdev, mdev->bm_resync_fo); |
579 | 579 | ||
580 | if (bit == -1UL) { | 580 | if (bit == DRBD_END_OF_BITMAP) { |
581 | mdev->bm_resync_fo = drbd_bm_bits(mdev); | 581 | mdev->bm_resync_fo = drbd_bm_bits(mdev); |
582 | mdev->resync_work.cb = w_resync_inactive; | 582 | mdev->resync_work.cb = w_resync_inactive; |
583 | put_ldev(mdev); | 583 | put_ldev(mdev); |