diff options
author | Lars Ellenberg <lars.ellenberg@linbit.com> | 2010-12-15 02:59:09 -0500 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2011-03-10 05:36:40 -0500 |
commit | 95a0f10cddbf93ce89c175ac1c53dad2d20ad309 (patch) | |
tree | 98866a5938a75f235fdce6ab20cfa95fe984388e | |
parent | 7777a8ba1fc980e5edfe492ebf5a1676497b8db2 (diff) |
drbd: store in-core bitmap little endian, regardless of architecture
Our on-disk bitmap is a little endian bitstream.
Up to now, we have stored the in-core copy of that in
native endian, applying byte order conversion when necessary.
Instead, keep the bitmap pages little endian, as they are read from disk,
and use the generic_*_le_bit family of functions.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 166 |
1 files changed, 84 insertions, 82 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index c5361487cf47..8d959ed6c2cc 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/kmap_types.h> | 30 | #include <asm/kmap_types.h> |
31 | #include "drbd_int.h" | 31 | #include "drbd_int.h" |
32 | 32 | ||
33 | |||
33 | /* OPAQUE outside this file! | 34 | /* OPAQUE outside this file! |
34 | * interface defined in drbd_int.h | 35 | * interface defined in drbd_int.h |
35 | 36 | ||
@@ -154,6 +155,14 @@ void drbd_bm_unlock(struct drbd_conf *mdev) | |||
154 | mutex_unlock(&b->bm_change); | 155 | mutex_unlock(&b->bm_change); |
155 | } | 156 | } |
156 | 157 | ||
158 | static unsigned int bm_bit_to_page_idx(struct drbd_bitmap *b, u64 bitnr) | ||
159 | { | ||
160 | /* page_nr = (bitnr/8) >> PAGE_SHIFT; */ | ||
161 | unsigned int page_nr = bitnr >> (PAGE_SHIFT + 3); | ||
162 | BUG_ON(page_nr >= b->bm_number_of_pages); | ||
163 | return page_nr; | ||
164 | } | ||
165 | |||
157 | /* word offset to long pointer */ | 166 | /* word offset to long pointer */ |
158 | static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) | 167 | static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset, const enum km_type km) |
159 | { | 168 | { |
@@ -168,6 +177,17 @@ static unsigned long *__bm_map_paddr(struct drbd_bitmap *b, unsigned long offset | |||
168 | return (unsigned long *) kmap_atomic(page, km); | 177 | return (unsigned long *) kmap_atomic(page, km); |
169 | } | 178 | } |
170 | 179 | ||
180 | static unsigned long *__bm_map_pidx(struct drbd_bitmap *b, unsigned int idx, const enum km_type km) | ||
181 | { | ||
182 | struct page *page = b->bm_pages[idx]; | ||
183 | return (unsigned long *) kmap_atomic(page, km); | ||
184 | } | ||
185 | |||
186 | static unsigned long *bm_map_pidx(struct drbd_bitmap *b, unsigned int idx) | ||
187 | { | ||
188 | return __bm_map_pidx(b, idx, KM_IRQ1); | ||
189 | } | ||
190 | |||
171 | static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) | 191 | static unsigned long * bm_map_paddr(struct drbd_bitmap *b, unsigned long offset) |
172 | { | 192 | { |
173 | return __bm_map_paddr(b, offset, KM_IRQ1); | 193 | return __bm_map_paddr(b, offset, KM_IRQ1); |
@@ -329,22 +349,42 @@ void drbd_bm_cleanup(struct drbd_conf *mdev) | |||
329 | * this masks out the remaining bits. | 349 | * this masks out the remaining bits. |
330 | * Returns the number of bits cleared. | 350 | * Returns the number of bits cleared. |
331 | */ | 351 | */ |
352 | #define BITS_PER_PAGE (1UL << (PAGE_SHIFT + 3)) | ||
353 | #define BITS_PER_PAGE_MASK (BITS_PER_PAGE - 1) | ||
354 | #define BITS_PER_LONG_MASK (BITS_PER_LONG - 1) | ||
332 | static int bm_clear_surplus(struct drbd_bitmap *b) | 355 | static int bm_clear_surplus(struct drbd_bitmap *b) |
333 | { | 356 | { |
334 | const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; | 357 | unsigned long mask; |
335 | size_t w = b->bm_bits >> LN2_BPL; | ||
336 | int cleared = 0; | ||
337 | unsigned long *p_addr, *bm; | 358 | unsigned long *p_addr, *bm; |
359 | int tmp; | ||
360 | int cleared = 0; | ||
338 | 361 | ||
339 | p_addr = bm_map_paddr(b, w); | 362 | /* number of bits modulo bits per page */ |
340 | bm = p_addr + MLPP(w); | 363 | tmp = (b->bm_bits & BITS_PER_PAGE_MASK); |
341 | if (w < b->bm_words) { | 364 | /* mask the used bits of the word containing the last bit */ |
365 | mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; | ||
366 | /* bitmap is always stored little endian, | ||
367 | * on disk and in core memory alike */ | ||
368 | mask = cpu_to_lel(mask); | ||
369 | |||
370 | /* because of the "extra long to catch oob access" we allocate in | ||
371 | * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page | ||
372 | * containing the last _relevant_ bitmap word */ | ||
373 | p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, b->bm_bits - 1)); | ||
374 | bm = p_addr + (tmp/BITS_PER_LONG); | ||
375 | if (mask) { | ||
376 | /* If mask != 0, we are not exactly aligned, so bm now points | ||
377 | * to the long containing the last bit. | ||
378 | * If mask == 0, bm already points to the word immediately | ||
379 | * after the last (long word aligned) bit. */ | ||
342 | cleared = hweight_long(*bm & ~mask); | 380 | cleared = hweight_long(*bm & ~mask); |
343 | *bm &= mask; | 381 | *bm &= mask; |
344 | w++; bm++; | 382 | bm++; |
345 | } | 383 | } |
346 | 384 | ||
347 | if (w < b->bm_words) { | 385 | if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { |
386 | /* on a 32bit arch, we may need to zero out | ||
387 | * a padding long to align with a 64bit remote */ | ||
348 | cleared += hweight_long(*bm); | 388 | cleared += hweight_long(*bm); |
349 | *bm = 0; | 389 | *bm = 0; |
350 | } | 390 | } |
@@ -354,24 +394,41 @@ static int bm_clear_surplus(struct drbd_bitmap *b) | |||
354 | 394 | ||
355 | static void bm_set_surplus(struct drbd_bitmap *b) | 395 | static void bm_set_surplus(struct drbd_bitmap *b) |
356 | { | 396 | { |
357 | const unsigned long mask = (1UL << (b->bm_bits & (BITS_PER_LONG-1))) - 1; | 397 | unsigned long mask; |
358 | size_t w = b->bm_bits >> LN2_BPL; | ||
359 | unsigned long *p_addr, *bm; | 398 | unsigned long *p_addr, *bm; |
360 | 399 | int tmp; | |
361 | p_addr = bm_map_paddr(b, w); | 400 | |
362 | bm = p_addr + MLPP(w); | 401 | /* number of bits modulo bits per page */ |
363 | if (w < b->bm_words) { | 402 | tmp = (b->bm_bits & BITS_PER_PAGE_MASK); |
403 | /* mask the used bits of the word containing the last bit */ | ||
404 | mask = (1UL << (tmp & BITS_PER_LONG_MASK)) -1; | ||
405 | /* bitmap is always stored little endian, | ||
406 | * on disk and in core memory alike */ | ||
407 | mask = cpu_to_lel(mask); | ||
408 | |||
409 | /* because of the "extra long to catch oob access" we allocate in | ||
410 | * drbd_bm_resize, bm_number_of_pages -1 is not necessarily the page | ||
411 | * containing the last _relevant_ bitmap word */ | ||
412 | p_addr = bm_map_pidx(b, bm_bit_to_page_idx(b, b->bm_bits - 1)); | ||
413 | bm = p_addr + (tmp/BITS_PER_LONG); | ||
414 | if (mask) { | ||
415 | /* If mask != 0, we are not exactly aligned, so bm now points | ||
416 | * to the long containing the last bit. | ||
417 | * If mask == 0, bm already points to the word immediately | ||
418 | * after the last (long word aligned) bit. */ | ||
364 | *bm |= ~mask; | 419 | *bm |= ~mask; |
365 | bm++; w++; | 420 | bm++; |
366 | } | 421 | } |
367 | 422 | ||
368 | if (w < b->bm_words) { | 423 | if (BITS_PER_LONG == 32 && ((bm - p_addr) & 1) == 1) { |
369 | *bm = ~(0UL); | 424 | /* on a 32bit arch, we may need to zero out |
425 | * a padding long to align with a 64bit remote */ | ||
426 | *bm = ~0UL; | ||
370 | } | 427 | } |
371 | bm_unmap(p_addr); | 428 | bm_unmap(p_addr); |
372 | } | 429 | } |
373 | 430 | ||
374 | static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endian) | 431 | static unsigned long bm_count_bits(struct drbd_bitmap *b) |
375 | { | 432 | { |
376 | unsigned long *p_addr, *bm, offset = 0; | 433 | unsigned long *p_addr, *bm, offset = 0; |
377 | unsigned long bits = 0; | 434 | unsigned long bits = 0; |
@@ -389,10 +446,6 @@ static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endia | |||
389 | p_addr = __bm_map_paddr(b, offset, KM_USER0); | 446 | p_addr = __bm_map_paddr(b, offset, KM_USER0); |
390 | bm = p_addr + MLPP(offset); | 447 | bm = p_addr + MLPP(offset); |
391 | while (i--) { | 448 | while (i--) { |
392 | #ifndef __LITTLE_ENDIAN | ||
393 | if (swap_endian) | ||
394 | *bm = lel_to_cpu(*bm); | ||
395 | #endif | ||
396 | bits += hweight_long(*bm++); | 449 | bits += hweight_long(*bm++); |
397 | } | 450 | } |
398 | offset += do_now; | 451 | offset += do_now; |
@@ -415,16 +468,6 @@ static unsigned long __bm_count_bits(struct drbd_bitmap *b, const int swap_endia | |||
415 | return bits; | 468 | return bits; |
416 | } | 469 | } |
417 | 470 | ||
418 | static unsigned long bm_count_bits(struct drbd_bitmap *b) | ||
419 | { | ||
420 | return __bm_count_bits(b, 0); | ||
421 | } | ||
422 | |||
423 | static unsigned long bm_count_bits_swap_endian(struct drbd_bitmap *b) | ||
424 | { | ||
425 | return __bm_count_bits(b, 1); | ||
426 | } | ||
427 | |||
428 | /* offset and len in long words.*/ | 471 | /* offset and len in long words.*/ |
429 | static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) | 472 | static void bm_memset(struct drbd_bitmap *b, size_t offset, int c, size_t len) |
430 | { | 473 | { |
@@ -662,7 +705,7 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
662 | offset += do_now; | 705 | offset += do_now; |
663 | while (do_now--) { | 706 | while (do_now--) { |
664 | bits = hweight_long(*bm); | 707 | bits = hweight_long(*bm); |
665 | word = *bm | lel_to_cpu(*buffer++); | 708 | word = *bm | *buffer++; |
666 | *bm++ = word; | 709 | *bm++ = word; |
667 | b->bm_set += hweight_long(word) - bits; | 710 | b->bm_set += hweight_long(word) - bits; |
668 | } | 711 | } |
@@ -709,7 +752,7 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, | |||
709 | bm = p_addr + MLPP(offset); | 752 | bm = p_addr + MLPP(offset); |
710 | offset += do_now; | 753 | offset += do_now; |
711 | while (do_now--) | 754 | while (do_now--) |
712 | *buffer++ = cpu_to_lel(*bm++); | 755 | *buffer++ = *bm++; |
713 | bm_unmap(p_addr); | 756 | bm_unmap(p_addr); |
714 | } | 757 | } |
715 | } | 758 | } |
@@ -795,39 +838,6 @@ static void bm_page_io_async(struct drbd_conf *mdev, struct drbd_bitmap *b, int | |||
795 | } | 838 | } |
796 | } | 839 | } |
797 | 840 | ||
798 | # if defined(__LITTLE_ENDIAN) | ||
799 | /* nothing to do, on disk == in memory */ | ||
800 | # define bm_cpu_to_lel(x) ((void)0) | ||
801 | # else | ||
802 | static void bm_cpu_to_lel(struct drbd_bitmap *b) | ||
803 | { | ||
804 | /* need to cpu_to_lel all the pages ... | ||
805 | * this may be optimized by using | ||
806 | * cpu_to_lel(-1) == -1 and cpu_to_lel(0) == 0; | ||
807 | * the following is still not optimal, but better than nothing */ | ||
808 | unsigned int i; | ||
809 | unsigned long *p_addr, *bm; | ||
810 | if (b->bm_set == 0) { | ||
811 | /* no page at all; avoid swap if all is 0 */ | ||
812 | i = b->bm_number_of_pages; | ||
813 | } else if (b->bm_set == b->bm_bits) { | ||
814 | /* only the last page */ | ||
815 | i = b->bm_number_of_pages - 1; | ||
816 | } else { | ||
817 | /* all pages */ | ||
818 | i = 0; | ||
819 | } | ||
820 | for (; i < b->bm_number_of_pages; i++) { | ||
821 | p_addr = kmap_atomic(b->bm_pages[i], KM_USER0); | ||
822 | for (bm = p_addr; bm < p_addr + PAGE_SIZE/sizeof(long); bm++) | ||
823 | *bm = cpu_to_lel(*bm); | ||
824 | kunmap_atomic(p_addr, KM_USER0); | ||
825 | } | ||
826 | } | ||
827 | # endif | ||
828 | /* lel_to_cpu == cpu_to_lel */ | ||
829 | # define bm_lel_to_cpu(x) bm_cpu_to_lel(x) | ||
830 | |||
831 | /* | 841 | /* |
832 | * bm_rw: read/write the whole bitmap from/to its on disk location. | 842 | * bm_rw: read/write the whole bitmap from/to its on disk location. |
833 | */ | 843 | */ |
@@ -847,10 +857,6 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) | |||
847 | bm_words = drbd_bm_words(mdev); | 857 | bm_words = drbd_bm_words(mdev); |
848 | num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; | 858 | num_pages = (bm_words*sizeof(long) + PAGE_SIZE-1) >> PAGE_SHIFT; |
849 | 859 | ||
850 | /* on disk bitmap is little endian */ | ||
851 | if (rw == WRITE) | ||
852 | bm_cpu_to_lel(b); | ||
853 | |||
854 | now = jiffies; | 860 | now = jiffies; |
855 | atomic_set(&b->bm_async_io, num_pages); | 861 | atomic_set(&b->bm_async_io, num_pages); |
856 | __clear_bit(BM_MD_IO_ERROR, &b->bm_flags); | 862 | __clear_bit(BM_MD_IO_ERROR, &b->bm_flags); |
@@ -869,13 +875,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw) __must_hold(local) | |||
869 | 875 | ||
870 | now = jiffies; | 876 | now = jiffies; |
871 | if (rw == WRITE) { | 877 | if (rw == WRITE) { |
872 | /* swap back endianness */ | ||
873 | bm_lel_to_cpu(b); | ||
874 | /* flush bitmap to stable storage */ | ||
875 | drbd_md_flush(mdev); | 878 | drbd_md_flush(mdev); |
876 | } else /* rw == READ */ { | 879 | } else /* rw == READ */ { |
877 | /* just read, if necessary adjust endianness */ | 880 | b->bm_set = bm_count_bits(b); |
878 | b->bm_set = bm_count_bits_swap_endian(b); | ||
879 | dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", | 881 | dev_info(DEV, "recounting of set bits took additional %lu jiffies\n", |
880 | jiffies - now); | 882 | jiffies - now); |
881 | } | 883 | } |
@@ -969,9 +971,9 @@ static unsigned long __bm_find_next(struct drbd_conf *mdev, unsigned long bm_fo, | |||
969 | p_addr = __bm_map_paddr(b, offset, km); | 971 | p_addr = __bm_map_paddr(b, offset, km); |
970 | 972 | ||
971 | if (find_zero_bit) | 973 | if (find_zero_bit) |
972 | i = find_next_zero_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); | 974 | i = generic_find_next_zero_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); |
973 | else | 975 | else |
974 | i = find_next_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); | 976 | i = generic_find_next_le_bit(p_addr, PAGE_SIZE*8, bm_fo & BPP_MASK); |
975 | 977 | ||
976 | __bm_unmap(p_addr, km); | 978 | __bm_unmap(p_addr, km); |
977 | if (i < PAGE_SIZE*8) { | 979 | if (i < PAGE_SIZE*8) { |
@@ -1064,9 +1066,9 @@ static int __bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, | |||
1064 | last_page_nr = page_nr; | 1066 | last_page_nr = page_nr; |
1065 | } | 1067 | } |
1066 | if (val) | 1068 | if (val) |
1067 | c += (0 == __test_and_set_bit(bitnr & BPP_MASK, p_addr)); | 1069 | c += (0 == generic___test_and_set_le_bit(bitnr & BPP_MASK, p_addr)); |
1068 | else | 1070 | else |
1069 | c -= (0 != __test_and_clear_bit(bitnr & BPP_MASK, p_addr)); | 1071 | c -= (0 != generic___test_and_clear_le_bit(bitnr & BPP_MASK, p_addr)); |
1070 | } | 1072 | } |
1071 | if (p_addr) | 1073 | if (p_addr) |
1072 | __bm_unmap(p_addr, km); | 1074 | __bm_unmap(p_addr, km); |
@@ -1211,7 +1213,7 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) | |||
1211 | if (bitnr < b->bm_bits) { | 1213 | if (bitnr < b->bm_bits) { |
1212 | unsigned long offset = bitnr>>LN2_BPL; | 1214 | unsigned long offset = bitnr>>LN2_BPL; |
1213 | p_addr = bm_map_paddr(b, offset); | 1215 | p_addr = bm_map_paddr(b, offset); |
1214 | i = test_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; | 1216 | i = generic_test_le_bit(bitnr & BPP_MASK, p_addr) ? 1 : 0; |
1215 | bm_unmap(p_addr); | 1217 | bm_unmap(p_addr); |
1216 | } else if (bitnr == b->bm_bits) { | 1218 | } else if (bitnr == b->bm_bits) { |
1217 | i = -1; | 1219 | i = -1; |
@@ -1255,7 +1257,7 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi | |||
1255 | ERR_IF (bitnr >= b->bm_bits) { | 1257 | ERR_IF (bitnr >= b->bm_bits) { |
1256 | dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); | 1258 | dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); |
1257 | } else { | 1259 | } else { |
1258 | c += (0 != test_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); | 1260 | c += (0 != generic_test_le_bit(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); |
1259 | } | 1261 | } |
1260 | } | 1262 | } |
1261 | if (p_addr) | 1263 | if (p_addr) |