aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/md
diff options
context:
space:
mode:
authorVishal Verma <vishal.l.verma@intel.com>2015-12-24 21:20:34 -0500
committerDan Williams <dan.j.williams@intel.com>2016-01-09 11:39:03 -0500
commitfc974ee2bffdde47d1e4b220cf326952cc2c4794 (patch)
tree8d6e2d471fbd49ef1b1d7e2a31a0785dd0be9afc /drivers/md
parent99e6608c9e7414ae4f2168df8bf8fae3eb49e41f (diff)
md: convert to use the generic badblocks code
Retain badblocks as part of rdev, but use the accessor functions from include/linux/badblocks for all manipulation. Signed-off-by: Vishal Verma <vishal.l.verma@intel.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/md.c516
-rw-r--r--drivers/md/md.h40
2 files changed, 28 insertions, 528 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 807095f4c793..1e48aa9de352 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -34,6 +34,7 @@
34 34
35#include <linux/kthread.h> 35#include <linux/kthread.h>
36#include <linux/blkdev.h> 36#include <linux/blkdev.h>
37#include <linux/badblocks.h>
37#include <linux/sysctl.h> 38#include <linux/sysctl.h>
38#include <linux/seq_file.h> 39#include <linux/seq_file.h>
39#include <linux/fs.h> 40#include <linux/fs.h>
@@ -709,8 +710,7 @@ void md_rdev_clear(struct md_rdev *rdev)
709 put_page(rdev->bb_page); 710 put_page(rdev->bb_page);
710 rdev->bb_page = NULL; 711 rdev->bb_page = NULL;
711 } 712 }
712 kfree(rdev->badblocks.page); 713 badblocks_free(&rdev->badblocks);
713 rdev->badblocks.page = NULL;
714} 714}
715EXPORT_SYMBOL_GPL(md_rdev_clear); 715EXPORT_SYMBOL_GPL(md_rdev_clear);
716 716
@@ -1360,8 +1360,6 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb)
1360 return cpu_to_le32(csum); 1360 return cpu_to_le32(csum);
1361} 1361}
1362 1362
1363static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
1364 int acknowledged);
1365static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version) 1363static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
1366{ 1364{
1367 struct mdp_superblock_1 *sb; 1365 struct mdp_superblock_1 *sb;
@@ -1486,8 +1484,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
1486 count <<= sb->bblog_shift; 1484 count <<= sb->bblog_shift;
1487 if (bb + 1 == 0) 1485 if (bb + 1 == 0)
1488 break; 1486 break;
1489 if (md_set_badblocks(&rdev->badblocks, 1487 if (badblocks_set(&rdev->badblocks, sector, count, 1))
1490 sector, count, 1) == 0)
1491 return -EINVAL; 1488 return -EINVAL;
1492 } 1489 }
1493 } else if (sb->bblog_offset != 0) 1490 } else if (sb->bblog_offset != 0)
@@ -2319,7 +2316,7 @@ repeat:
2319 rdev_for_each(rdev, mddev) { 2316 rdev_for_each(rdev, mddev) {
2320 if (rdev->badblocks.changed) { 2317 if (rdev->badblocks.changed) {
2321 rdev->badblocks.changed = 0; 2318 rdev->badblocks.changed = 0;
2322 md_ack_all_badblocks(&rdev->badblocks); 2319 ack_all_badblocks(&rdev->badblocks);
2323 md_error(mddev, rdev); 2320 md_error(mddev, rdev);
2324 } 2321 }
2325 clear_bit(Blocked, &rdev->flags); 2322 clear_bit(Blocked, &rdev->flags);
@@ -2445,7 +2442,7 @@ repeat:
2445 clear_bit(Blocked, &rdev->flags); 2442 clear_bit(Blocked, &rdev->flags);
2446 2443
2447 if (any_badblocks_changed) 2444 if (any_badblocks_changed)
2448 md_ack_all_badblocks(&rdev->badblocks); 2445 ack_all_badblocks(&rdev->badblocks);
2449 clear_bit(BlockedBadBlocks, &rdev->flags); 2446 clear_bit(BlockedBadBlocks, &rdev->flags);
2450 wake_up(&rdev->blocked_wait); 2447 wake_up(&rdev->blocked_wait);
2451 } 2448 }
@@ -3046,11 +3043,17 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_
3046static struct rdev_sysfs_entry rdev_recovery_start = 3043static struct rdev_sysfs_entry rdev_recovery_start =
3047__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store); 3044__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
3048 3045
3049static ssize_t 3046/* sysfs access to bad-blocks list.
3050badblocks_show(struct badblocks *bb, char *page, int unack); 3047 * We present two files.
3051static ssize_t 3048 * 'bad-blocks' lists sector numbers and lengths of ranges that
3052badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack); 3049 * are recorded as bad. The list is truncated to fit within
3053 3050 * the one-page limit of sysfs.
3051 * Writing "sector length" to this file adds an acknowledged
3052 * bad block list.
3053 * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
3054 * been acknowledged. Writing to this file adds bad blocks
3055 * without acknowledging them. This is largely for testing.
3056 */
3054static ssize_t bb_show(struct md_rdev *rdev, char *page) 3057static ssize_t bb_show(struct md_rdev *rdev, char *page)
3055{ 3058{
3056 return badblocks_show(&rdev->badblocks, page, 0); 3059 return badblocks_show(&rdev->badblocks, page, 0);
@@ -3165,14 +3168,7 @@ int md_rdev_init(struct md_rdev *rdev)
3165 * This reserves the space even on arrays where it cannot 3168 * This reserves the space even on arrays where it cannot
3166 * be used - I wonder if that matters 3169 * be used - I wonder if that matters
3167 */ 3170 */
3168 rdev->badblocks.count = 0; 3171 return badblocks_init(&rdev->badblocks, 0);
3169 rdev->badblocks.shift = -1; /* disabled until explicitly enabled */
3170 rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
3171 seqlock_init(&rdev->badblocks.lock);
3172 if (rdev->badblocks.page == NULL)
3173 return -ENOMEM;
3174
3175 return 0;
3176} 3172}
3177EXPORT_SYMBOL_GPL(md_rdev_init); 3173EXPORT_SYMBOL_GPL(md_rdev_init);
3178/* 3174/*
@@ -8478,254 +8474,9 @@ void md_finish_reshape(struct mddev *mddev)
8478} 8474}
8479EXPORT_SYMBOL(md_finish_reshape); 8475EXPORT_SYMBOL(md_finish_reshape);
8480 8476
8481/* Bad block management. 8477/* Bad block management */
8482 * We can record which blocks on each device are 'bad' and so just
8483 * fail those blocks, or that stripe, rather than the whole device.
8484 * Entries in the bad-block table are 64bits wide. This comprises:
8485 * Length of bad-range, in sectors: 0-511 for lengths 1-512
8486 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
8487 * A 'shift' can be set so that larger blocks are tracked and
8488 * consequently larger devices can be covered.
8489 * 'Acknowledged' flag - 1 bit. - the most significant bit.
8490 *
8491 * Locking of the bad-block table uses a seqlock so md_is_badblock
8492 * might need to retry if it is very unlucky.
8493 * We will sometimes want to check for bad blocks in a bi_end_io function,
8494 * so we use the write_seqlock_irq variant.
8495 *
8496 * When looking for a bad block we specify a range and want to
8497 * know if any block in the range is bad. So we binary-search
8498 * to the last range that starts at-or-before the given endpoint,
8499 * (or "before the sector after the target range")
8500 * then see if it ends after the given start.
8501 * We return
8502 * 0 if there are no known bad blocks in the range
8503 * 1 if there are known bad block which are all acknowledged
8504 * -1 if there are bad blocks which have not yet been acknowledged in metadata.
8505 * plus the start/length of the first bad section we overlap.
8506 */
8507int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
8508 sector_t *first_bad, int *bad_sectors)
8509{
8510 int hi;
8511 int lo;
8512 u64 *p = bb->page;
8513 int rv;
8514 sector_t target = s + sectors;
8515 unsigned seq;
8516
8517 if (bb->shift > 0) {
8518 /* round the start down, and the end up */
8519 s >>= bb->shift;
8520 target += (1<<bb->shift) - 1;
8521 target >>= bb->shift;
8522 sectors = target - s;
8523 }
8524 /* 'target' is now the first block after the bad range */
8525
8526retry:
8527 seq = read_seqbegin(&bb->lock);
8528 lo = 0;
8529 rv = 0;
8530 hi = bb->count;
8531
8532 /* Binary search between lo and hi for 'target'
8533 * i.e. for the last range that starts before 'target'
8534 */
8535 /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
8536 * are known not to be the last range before target.
8537 * VARIANT: hi-lo is the number of possible
8538 * ranges, and decreases until it reaches 1
8539 */
8540 while (hi - lo > 1) {
8541 int mid = (lo + hi) / 2;
8542 sector_t a = BB_OFFSET(p[mid]);
8543 if (a < target)
8544 /* This could still be the one, earlier ranges
8545 * could not. */
8546 lo = mid;
8547 else
8548 /* This and later ranges are definitely out. */
8549 hi = mid;
8550 }
8551 /* 'lo' might be the last that started before target, but 'hi' isn't */
8552 if (hi > lo) {
8553 /* need to check all range that end after 's' to see if
8554 * any are unacknowledged.
8555 */
8556 while (lo >= 0 &&
8557 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8558 if (BB_OFFSET(p[lo]) < target) {
8559 /* starts before the end, and finishes after
8560 * the start, so they must overlap
8561 */
8562 if (rv != -1 && BB_ACK(p[lo]))
8563 rv = 1;
8564 else
8565 rv = -1;
8566 *first_bad = BB_OFFSET(p[lo]);
8567 *bad_sectors = BB_LEN(p[lo]);
8568 }
8569 lo--;
8570 }
8571 }
8572
8573 if (read_seqretry(&bb->lock, seq))
8574 goto retry;
8575
8576 return rv;
8577}
8578EXPORT_SYMBOL_GPL(md_is_badblock);
8579
8580/*
8581 * Add a range of bad blocks to the table.
8582 * This might extend the table, or might contract it
8583 * if two adjacent ranges can be merged.
8584 * We binary-search to find the 'insertion' point, then
8585 * decide how best to handle it.
8586 */
8587static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
8588 int acknowledged)
8589{
8590 u64 *p;
8591 int lo, hi;
8592 int rv = 1;
8593 unsigned long flags;
8594
8595 if (bb->shift < 0)
8596 /* badblocks are disabled */
8597 return 0;
8598
8599 if (bb->shift) {
8600 /* round the start down, and the end up */
8601 sector_t next = s + sectors;
8602 s >>= bb->shift;
8603 next += (1<<bb->shift) - 1;
8604 next >>= bb->shift;
8605 sectors = next - s;
8606 }
8607
8608 write_seqlock_irqsave(&bb->lock, flags);
8609
8610 p = bb->page;
8611 lo = 0;
8612 hi = bb->count;
8613 /* Find the last range that starts at-or-before 's' */
8614 while (hi - lo > 1) {
8615 int mid = (lo + hi) / 2;
8616 sector_t a = BB_OFFSET(p[mid]);
8617 if (a <= s)
8618 lo = mid;
8619 else
8620 hi = mid;
8621 }
8622 if (hi > lo && BB_OFFSET(p[lo]) > s)
8623 hi = lo;
8624
8625 if (hi > lo) {
8626 /* we found a range that might merge with the start
8627 * of our new range
8628 */
8629 sector_t a = BB_OFFSET(p[lo]);
8630 sector_t e = a + BB_LEN(p[lo]);
8631 int ack = BB_ACK(p[lo]);
8632 if (e >= s) {
8633 /* Yes, we can merge with a previous range */
8634 if (s == a && s + sectors >= e)
8635 /* new range covers old */
8636 ack = acknowledged;
8637 else
8638 ack = ack && acknowledged;
8639
8640 if (e < s + sectors)
8641 e = s + sectors;
8642 if (e - a <= BB_MAX_LEN) {
8643 p[lo] = BB_MAKE(a, e-a, ack);
8644 s = e;
8645 } else {
8646 /* does not all fit in one range,
8647 * make p[lo] maximal
8648 */
8649 if (BB_LEN(p[lo]) != BB_MAX_LEN)
8650 p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
8651 s = a + BB_MAX_LEN;
8652 }
8653 sectors = e - s;
8654 }
8655 }
8656 if (sectors && hi < bb->count) {
8657 /* 'hi' points to the first range that starts after 's'.
8658 * Maybe we can merge with the start of that range */
8659 sector_t a = BB_OFFSET(p[hi]);
8660 sector_t e = a + BB_LEN(p[hi]);
8661 int ack = BB_ACK(p[hi]);
8662 if (a <= s + sectors) {
8663 /* merging is possible */
8664 if (e <= s + sectors) {
8665 /* full overlap */
8666 e = s + sectors;
8667 ack = acknowledged;
8668 } else
8669 ack = ack && acknowledged;
8670
8671 a = s;
8672 if (e - a <= BB_MAX_LEN) {
8673 p[hi] = BB_MAKE(a, e-a, ack);
8674 s = e;
8675 } else {
8676 p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
8677 s = a + BB_MAX_LEN;
8678 }
8679 sectors = e - s;
8680 lo = hi;
8681 hi++;
8682 }
8683 }
8684 if (sectors == 0 && hi < bb->count) {
8685 /* we might be able to combine lo and hi */
8686 /* Note: 's' is at the end of 'lo' */
8687 sector_t a = BB_OFFSET(p[hi]);
8688 int lolen = BB_LEN(p[lo]);
8689 int hilen = BB_LEN(p[hi]);
8690 int newlen = lolen + hilen - (s - a);
8691 if (s >= a && newlen < BB_MAX_LEN) {
8692 /* yes, we can combine them */
8693 int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
8694 p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
8695 memmove(p + hi, p + hi + 1,
8696 (bb->count - hi - 1) * 8);
8697 bb->count--;
8698 }
8699 }
8700 while (sectors) {
8701 /* didn't merge (it all).
8702 * Need to add a range just before 'hi' */
8703 if (bb->count >= MD_MAX_BADBLOCKS) {
8704 /* No room for more */
8705 rv = 0;
8706 break;
8707 } else {
8708 int this_sectors = sectors;
8709 memmove(p + hi + 1, p + hi,
8710 (bb->count - hi) * 8);
8711 bb->count++;
8712
8713 if (this_sectors > BB_MAX_LEN)
8714 this_sectors = BB_MAX_LEN;
8715 p[hi] = BB_MAKE(s, this_sectors, acknowledged);
8716 sectors -= this_sectors;
8717 s += this_sectors;
8718 }
8719 }
8720
8721 bb->changed = 1;
8722 if (!acknowledged)
8723 bb->unacked_exist = 1;
8724 write_sequnlock_irqrestore(&bb->lock, flags);
8725
8726 return rv;
8727}
8728 8478
8479/* Returns 1 on success, 0 on failure */
8729int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, 8480int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8730 int is_new) 8481 int is_new)
8731{ 8482{
@@ -8734,114 +8485,19 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8734 s += rdev->new_data_offset; 8485 s += rdev->new_data_offset;
8735 else 8486 else
8736 s += rdev->data_offset; 8487 s += rdev->data_offset;
8737 rv = md_set_badblocks(&rdev->badblocks, 8488 rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
8738 s, sectors, 0); 8489 if (rv == 0) {
8739 if (rv) {
8740 /* Make sure they get written out promptly */ 8490 /* Make sure they get written out promptly */
8741 sysfs_notify_dirent_safe(rdev->sysfs_state); 8491 sysfs_notify_dirent_safe(rdev->sysfs_state);
8742 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags); 8492 set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
8743 set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags); 8493 set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags);
8744 md_wakeup_thread(rdev->mddev->thread); 8494 md_wakeup_thread(rdev->mddev->thread);
8745 } 8495 return 1;
8746 return rv; 8496 } else
8497 return 0;
8747} 8498}
8748EXPORT_SYMBOL_GPL(rdev_set_badblocks); 8499EXPORT_SYMBOL_GPL(rdev_set_badblocks);
8749 8500
8750/*
8751 * Remove a range of bad blocks from the table.
8752 * This may involve extending the table if we spilt a region,
8753 * but it must not fail. So if the table becomes full, we just
8754 * drop the remove request.
8755 */
8756static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
8757{
8758 u64 *p;
8759 int lo, hi;
8760 sector_t target = s + sectors;
8761 int rv = 0;
8762
8763 if (bb->shift > 0) {
8764 /* When clearing we round the start up and the end down.
8765 * This should not matter as the shift should align with
8766 * the block size and no rounding should ever be needed.
8767 * However it is better the think a block is bad when it
8768 * isn't than to think a block is not bad when it is.
8769 */
8770 s += (1<<bb->shift) - 1;
8771 s >>= bb->shift;
8772 target >>= bb->shift;
8773 sectors = target - s;
8774 }
8775
8776 write_seqlock_irq(&bb->lock);
8777
8778 p = bb->page;
8779 lo = 0;
8780 hi = bb->count;
8781 /* Find the last range that starts before 'target' */
8782 while (hi - lo > 1) {
8783 int mid = (lo + hi) / 2;
8784 sector_t a = BB_OFFSET(p[mid]);
8785 if (a < target)
8786 lo = mid;
8787 else
8788 hi = mid;
8789 }
8790 if (hi > lo) {
8791 /* p[lo] is the last range that could overlap the
8792 * current range. Earlier ranges could also overlap,
8793 * but only this one can overlap the end of the range.
8794 */
8795 if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
8796 /* Partial overlap, leave the tail of this range */
8797 int ack = BB_ACK(p[lo]);
8798 sector_t a = BB_OFFSET(p[lo]);
8799 sector_t end = a + BB_LEN(p[lo]);
8800
8801 if (a < s) {
8802 /* we need to split this range */
8803 if (bb->count >= MD_MAX_BADBLOCKS) {
8804 rv = -ENOSPC;
8805 goto out;
8806 }
8807 memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
8808 bb->count++;
8809 p[lo] = BB_MAKE(a, s-a, ack);
8810 lo++;
8811 }
8812 p[lo] = BB_MAKE(target, end - target, ack);
8813 /* there is no longer an overlap */
8814 hi = lo;
8815 lo--;
8816 }
8817 while (lo >= 0 &&
8818 BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
8819 /* This range does overlap */
8820 if (BB_OFFSET(p[lo]) < s) {
8821 /* Keep the early parts of this range. */
8822 int ack = BB_ACK(p[lo]);
8823 sector_t start = BB_OFFSET(p[lo]);
8824 p[lo] = BB_MAKE(start, s - start, ack);
8825 /* now low doesn't overlap, so.. */
8826 break;
8827 }
8828 lo--;
8829 }
8830 /* 'lo' is strictly before, 'hi' is strictly after,
8831 * anything between needs to be discarded
8832 */
8833 if (hi - lo > 1) {
8834 memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
8835 bb->count -= (hi - lo - 1);
8836 }
8837 }
8838
8839 bb->changed = 1;
8840out:
8841 write_sequnlock_irq(&bb->lock);
8842 return rv;
8843}
8844
8845int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, 8501int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8846 int is_new) 8502 int is_new)
8847{ 8503{
@@ -8849,133 +8505,11 @@ int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
8849 s += rdev->new_data_offset; 8505 s += rdev->new_data_offset;
8850 else 8506 else
8851 s += rdev->data_offset; 8507 s += rdev->data_offset;
8852 return md_clear_badblocks(&rdev->badblocks, 8508 return badblocks_clear(&rdev->badblocks,
8853 s, sectors); 8509 s, sectors);
8854} 8510}
8855EXPORT_SYMBOL_GPL(rdev_clear_badblocks); 8511EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
8856 8512
8857/*
8858 * Acknowledge all bad blocks in a list.
8859 * This only succeeds if ->changed is clear. It is used by
8860 * in-kernel metadata updates
8861 */
8862void md_ack_all_badblocks(struct badblocks *bb)
8863{
8864 if (bb->page == NULL || bb->changed)
8865 /* no point even trying */
8866 return;
8867 write_seqlock_irq(&bb->lock);
8868
8869 if (bb->changed == 0 && bb->unacked_exist) {
8870 u64 *p = bb->page;
8871 int i;
8872 for (i = 0; i < bb->count ; i++) {
8873 if (!BB_ACK(p[i])) {
8874 sector_t start = BB_OFFSET(p[i]);
8875 int len = BB_LEN(p[i]);
8876 p[i] = BB_MAKE(start, len, 1);
8877 }
8878 }
8879 bb->unacked_exist = 0;
8880 }
8881 write_sequnlock_irq(&bb->lock);
8882}
8883EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
8884
8885/* sysfs access to bad-blocks list.
8886 * We present two files.
8887 * 'bad-blocks' lists sector numbers and lengths of ranges that
8888 * are recorded as bad. The list is truncated to fit within
8889 * the one-page limit of sysfs.
8890 * Writing "sector length" to this file adds an acknowledged
8891 * bad block list.
8892 * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
8893 * been acknowledged. Writing to this file adds bad blocks
8894 * without acknowledging them. This is largely for testing.
8895 */
8896
8897static ssize_t
8898badblocks_show(struct badblocks *bb, char *page, int unack)
8899{
8900 size_t len;
8901 int i;
8902 u64 *p = bb->page;
8903 unsigned seq;
8904
8905 if (bb->shift < 0)
8906 return 0;
8907
8908retry:
8909 seq = read_seqbegin(&bb->lock);
8910
8911 len = 0;
8912 i = 0;
8913
8914 while (len < PAGE_SIZE && i < bb->count) {
8915 sector_t s = BB_OFFSET(p[i]);
8916 unsigned int length = BB_LEN(p[i]);
8917 int ack = BB_ACK(p[i]);
8918 i++;
8919
8920 if (unack && ack)
8921 continue;
8922
8923 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
8924 (unsigned long long)s << bb->shift,
8925 length << bb->shift);
8926 }
8927 if (unack && len == 0)
8928 bb->unacked_exist = 0;
8929
8930 if (read_seqretry(&bb->lock, seq))
8931 goto retry;
8932
8933 return len;
8934}
8935
8936#define DO_DEBUG 1
8937
8938static ssize_t
8939badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
8940{
8941 unsigned long long sector;
8942 int length;
8943 char newline;
8944#ifdef DO_DEBUG
8945 /* Allow clearing via sysfs *only* for testing/debugging.
8946 * Normally only a successful write may clear a badblock
8947 */
8948 int clear = 0;
8949 if (page[0] == '-') {
8950 clear = 1;
8951 page++;
8952 }
8953#endif /* DO_DEBUG */
8954
8955 switch (sscanf(page, "%llu %d%c", &sector, &length, &newline)) {
8956 case 3:
8957 if (newline != '\n')
8958 return -EINVAL;
8959 case 2:
8960 if (length <= 0)
8961 return -EINVAL;
8962 break;
8963 default:
8964 return -EINVAL;
8965 }
8966
8967#ifdef DO_DEBUG
8968 if (clear) {
8969 md_clear_badblocks(bb, sector, length);
8970 return len;
8971 }
8972#endif /* DO_DEBUG */
8973 if (md_set_badblocks(bb, sector, length, !unack))
8974 return len;
8975 else
8976 return -ENOSPC;
8977}
8978
8979static int md_notify_reboot(struct notifier_block *this, 8513static int md_notify_reboot(struct notifier_block *this,
8980 unsigned long code, void *x) 8514 unsigned long code, void *x)
8981{ 8515{
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 2bea51edfab7..389afc420db6 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -17,6 +17,7 @@
17 17
18#include <linux/blkdev.h> 18#include <linux/blkdev.h>
19#include <linux/backing-dev.h> 19#include <linux/backing-dev.h>
20#include <linux/badblocks.h>
20#include <linux/kobject.h> 21#include <linux/kobject.h>
21#include <linux/list.h> 22#include <linux/list.h>
22#include <linux/mm.h> 23#include <linux/mm.h>
@@ -28,13 +29,6 @@
28 29
29#define MaxSector (~(sector_t)0) 30#define MaxSector (~(sector_t)0)
30 31
31/* Bad block numbers are stored sorted in a single page.
32 * 64bits is used for each block or extent.
33 * 54 bits are sector number, 9 bits are extent size,
34 * 1 bit is an 'acknowledged' flag.
35 */
36#define MD_MAX_BADBLOCKS (PAGE_SIZE/8)
37
38/* 32/*
39 * MD's 'extended' device 33 * MD's 'extended' device
40 */ 34 */
@@ -117,22 +111,7 @@ struct md_rdev {
117 struct kernfs_node *sysfs_state; /* handle for 'state' 111 struct kernfs_node *sysfs_state; /* handle for 'state'
118 * sysfs entry */ 112 * sysfs entry */
119 113
120 struct badblocks { 114 struct badblocks badblocks;
121 int count; /* count of bad blocks */
122 int unacked_exist; /* there probably are unacknowledged
123 * bad blocks. This is only cleared
124 * when a read discovers none
125 */
126 int shift; /* shift from sectors to block size
127 * a -ve shift means badblocks are
128 * disabled.*/
129 u64 *page; /* badblock list */
130 int changed;
131 seqlock_t lock;
132
133 sector_t sector;
134 sector_t size; /* in sectors */
135 } badblocks;
136}; 115};
137enum flag_bits { 116enum flag_bits {
138 Faulty, /* device is known to have a fault */ 117 Faulty, /* device is known to have a fault */
@@ -185,22 +164,11 @@ enum flag_bits {
185 */ 164 */
186}; 165};
187 166
188#define BB_LEN_MASK (0x00000000000001FFULL)
189#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL)
190#define BB_ACK_MASK (0x8000000000000000ULL)
191#define BB_MAX_LEN 512
192#define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9)
193#define BB_LEN(x) (((x) & BB_LEN_MASK) + 1)
194#define BB_ACK(x) (!!((x) & BB_ACK_MASK))
195#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63))
196
197extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
198 sector_t *first_bad, int *bad_sectors);
199static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, 167static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
200 sector_t *first_bad, int *bad_sectors) 168 sector_t *first_bad, int *bad_sectors)
201{ 169{
202 if (unlikely(rdev->badblocks.count)) { 170 if (unlikely(rdev->badblocks.count)) {
203 int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s, 171 int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
204 sectors, 172 sectors,
205 first_bad, bad_sectors); 173 first_bad, bad_sectors);
206 if (rv) 174 if (rv)
@@ -213,8 +181,6 @@ extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
213 int is_new); 181 int is_new);
214extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, 182extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
215 int is_new); 183 int is_new);
216extern void md_ack_all_badblocks(struct badblocks *bb);
217
218struct md_cluster_info; 184struct md_cluster_info;
219 185
220struct mddev { 186struct mddev {