diff options
author | Vishal Verma <vishal.l.verma@intel.com> | 2015-12-24 21:20:34 -0500 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2016-01-09 11:39:03 -0500 |
commit | fc974ee2bffdde47d1e4b220cf326952cc2c4794 (patch) | |
tree | 8d6e2d471fbd49ef1b1d7e2a31a0785dd0be9afc /drivers/md | |
parent | 99e6608c9e7414ae4f2168df8bf8fae3eb49e41f (diff) |
md: convert to use the generic badblocks code
Retain badblocks as part of rdev, but use the accessor functions from
include/linux/badblocks for all manipulation.
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/md.c | 516 | ||||
-rw-r--r-- | drivers/md/md.h | 40 |
2 files changed, 28 insertions, 528 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 807095f4c793..1e48aa9de352 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c | |||
@@ -34,6 +34,7 @@ | |||
34 | 34 | ||
35 | #include <linux/kthread.h> | 35 | #include <linux/kthread.h> |
36 | #include <linux/blkdev.h> | 36 | #include <linux/blkdev.h> |
37 | #include <linux/badblocks.h> | ||
37 | #include <linux/sysctl.h> | 38 | #include <linux/sysctl.h> |
38 | #include <linux/seq_file.h> | 39 | #include <linux/seq_file.h> |
39 | #include <linux/fs.h> | 40 | #include <linux/fs.h> |
@@ -709,8 +710,7 @@ void md_rdev_clear(struct md_rdev *rdev) | |||
709 | put_page(rdev->bb_page); | 710 | put_page(rdev->bb_page); |
710 | rdev->bb_page = NULL; | 711 | rdev->bb_page = NULL; |
711 | } | 712 | } |
712 | kfree(rdev->badblocks.page); | 713 | badblocks_free(&rdev->badblocks); |
713 | rdev->badblocks.page = NULL; | ||
714 | } | 714 | } |
715 | EXPORT_SYMBOL_GPL(md_rdev_clear); | 715 | EXPORT_SYMBOL_GPL(md_rdev_clear); |
716 | 716 | ||
@@ -1360,8 +1360,6 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 *sb) | |||
1360 | return cpu_to_le32(csum); | 1360 | return cpu_to_le32(csum); |
1361 | } | 1361 | } |
1362 | 1362 | ||
1363 | static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, | ||
1364 | int acknowledged); | ||
1365 | static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version) | 1363 | static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version) |
1366 | { | 1364 | { |
1367 | struct mdp_superblock_1 *sb; | 1365 | struct mdp_superblock_1 *sb; |
@@ -1486,8 +1484,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ | |||
1486 | count <<= sb->bblog_shift; | 1484 | count <<= sb->bblog_shift; |
1487 | if (bb + 1 == 0) | 1485 | if (bb + 1 == 0) |
1488 | break; | 1486 | break; |
1489 | if (md_set_badblocks(&rdev->badblocks, | 1487 | if (badblocks_set(&rdev->badblocks, sector, count, 1)) |
1490 | sector, count, 1) == 0) | ||
1491 | return -EINVAL; | 1488 | return -EINVAL; |
1492 | } | 1489 | } |
1493 | } else if (sb->bblog_offset != 0) | 1490 | } else if (sb->bblog_offset != 0) |
@@ -2319,7 +2316,7 @@ repeat: | |||
2319 | rdev_for_each(rdev, mddev) { | 2316 | rdev_for_each(rdev, mddev) { |
2320 | if (rdev->badblocks.changed) { | 2317 | if (rdev->badblocks.changed) { |
2321 | rdev->badblocks.changed = 0; | 2318 | rdev->badblocks.changed = 0; |
2322 | md_ack_all_badblocks(&rdev->badblocks); | 2319 | ack_all_badblocks(&rdev->badblocks); |
2323 | md_error(mddev, rdev); | 2320 | md_error(mddev, rdev); |
2324 | } | 2321 | } |
2325 | clear_bit(Blocked, &rdev->flags); | 2322 | clear_bit(Blocked, &rdev->flags); |
@@ -2445,7 +2442,7 @@ repeat: | |||
2445 | clear_bit(Blocked, &rdev->flags); | 2442 | clear_bit(Blocked, &rdev->flags); |
2446 | 2443 | ||
2447 | if (any_badblocks_changed) | 2444 | if (any_badblocks_changed) |
2448 | md_ack_all_badblocks(&rdev->badblocks); | 2445 | ack_all_badblocks(&rdev->badblocks); |
2449 | clear_bit(BlockedBadBlocks, &rdev->flags); | 2446 | clear_bit(BlockedBadBlocks, &rdev->flags); |
2450 | wake_up(&rdev->blocked_wait); | 2447 | wake_up(&rdev->blocked_wait); |
2451 | } | 2448 | } |
@@ -3046,11 +3043,17 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_ | |||
3046 | static struct rdev_sysfs_entry rdev_recovery_start = | 3043 | static struct rdev_sysfs_entry rdev_recovery_start = |
3047 | __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store); | 3044 | __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store); |
3048 | 3045 | ||
3049 | static ssize_t | 3046 | /* sysfs access to bad-blocks list. |
3050 | badblocks_show(struct badblocks *bb, char *page, int unack); | 3047 | * We present two files. |
3051 | static ssize_t | 3048 | * 'bad-blocks' lists sector numbers and lengths of ranges that |
3052 | badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack); | 3049 | * are recorded as bad. The list is truncated to fit within |
3053 | 3050 | * the one-page limit of sysfs. | |
3051 | * Writing "sector length" to this file adds an acknowledged | ||
3052 | * bad block list. | ||
3053 | * 'unacknowledged-bad-blocks' lists bad blocks that have not yet | ||
3054 | * been acknowledged. Writing to this file adds bad blocks | ||
3055 | * without acknowledging them. This is largely for testing. | ||
3056 | */ | ||
3054 | static ssize_t bb_show(struct md_rdev *rdev, char *page) | 3057 | static ssize_t bb_show(struct md_rdev *rdev, char *page) |
3055 | { | 3058 | { |
3056 | return badblocks_show(&rdev->badblocks, page, 0); | 3059 | return badblocks_show(&rdev->badblocks, page, 0); |
@@ -3165,14 +3168,7 @@ int md_rdev_init(struct md_rdev *rdev) | |||
3165 | * This reserves the space even on arrays where it cannot | 3168 | * This reserves the space even on arrays where it cannot |
3166 | * be used - I wonder if that matters | 3169 | * be used - I wonder if that matters |
3167 | */ | 3170 | */ |
3168 | rdev->badblocks.count = 0; | 3171 | return badblocks_init(&rdev->badblocks, 0); |
3169 | rdev->badblocks.shift = -1; /* disabled until explicitly enabled */ | ||
3170 | rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3171 | seqlock_init(&rdev->badblocks.lock); | ||
3172 | if (rdev->badblocks.page == NULL) | ||
3173 | return -ENOMEM; | ||
3174 | |||
3175 | return 0; | ||
3176 | } | 3172 | } |
3177 | EXPORT_SYMBOL_GPL(md_rdev_init); | 3173 | EXPORT_SYMBOL_GPL(md_rdev_init); |
3178 | /* | 3174 | /* |
@@ -8478,254 +8474,9 @@ void md_finish_reshape(struct mddev *mddev) | |||
8478 | } | 8474 | } |
8479 | EXPORT_SYMBOL(md_finish_reshape); | 8475 | EXPORT_SYMBOL(md_finish_reshape); |
8480 | 8476 | ||
8481 | /* Bad block management. | 8477 | /* Bad block management */ |
8482 | * We can record which blocks on each device are 'bad' and so just | ||
8483 | * fail those blocks, or that stripe, rather than the whole device. | ||
8484 | * Entries in the bad-block table are 64bits wide. This comprises: | ||
8485 | * Length of bad-range, in sectors: 0-511 for lengths 1-512 | ||
8486 | * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) | ||
8487 | * A 'shift' can be set so that larger blocks are tracked and | ||
8488 | * consequently larger devices can be covered. | ||
8489 | * 'Acknowledged' flag - 1 bit. - the most significant bit. | ||
8490 | * | ||
8491 | * Locking of the bad-block table uses a seqlock so md_is_badblock | ||
8492 | * might need to retry if it is very unlucky. | ||
8493 | * We will sometimes want to check for bad blocks in a bi_end_io function, | ||
8494 | * so we use the write_seqlock_irq variant. | ||
8495 | * | ||
8496 | * When looking for a bad block we specify a range and want to | ||
8497 | * know if any block in the range is bad. So we binary-search | ||
8498 | * to the last range that starts at-or-before the given endpoint, | ||
8499 | * (or "before the sector after the target range") | ||
8500 | * then see if it ends after the given start. | ||
8501 | * We return | ||
8502 | * 0 if there are no known bad blocks in the range | ||
8503 | * 1 if there are known bad block which are all acknowledged | ||
8504 | * -1 if there are bad blocks which have not yet been acknowledged in metadata. | ||
8505 | * plus the start/length of the first bad section we overlap. | ||
8506 | */ | ||
8507 | int md_is_badblock(struct badblocks *bb, sector_t s, int sectors, | ||
8508 | sector_t *first_bad, int *bad_sectors) | ||
8509 | { | ||
8510 | int hi; | ||
8511 | int lo; | ||
8512 | u64 *p = bb->page; | ||
8513 | int rv; | ||
8514 | sector_t target = s + sectors; | ||
8515 | unsigned seq; | ||
8516 | |||
8517 | if (bb->shift > 0) { | ||
8518 | /* round the start down, and the end up */ | ||
8519 | s >>= bb->shift; | ||
8520 | target += (1<<bb->shift) - 1; | ||
8521 | target >>= bb->shift; | ||
8522 | sectors = target - s; | ||
8523 | } | ||
8524 | /* 'target' is now the first block after the bad range */ | ||
8525 | |||
8526 | retry: | ||
8527 | seq = read_seqbegin(&bb->lock); | ||
8528 | lo = 0; | ||
8529 | rv = 0; | ||
8530 | hi = bb->count; | ||
8531 | |||
8532 | /* Binary search between lo and hi for 'target' | ||
8533 | * i.e. for the last range that starts before 'target' | ||
8534 | */ | ||
8535 | /* INVARIANT: ranges before 'lo' and at-or-after 'hi' | ||
8536 | * are known not to be the last range before target. | ||
8537 | * VARIANT: hi-lo is the number of possible | ||
8538 | * ranges, and decreases until it reaches 1 | ||
8539 | */ | ||
8540 | while (hi - lo > 1) { | ||
8541 | int mid = (lo + hi) / 2; | ||
8542 | sector_t a = BB_OFFSET(p[mid]); | ||
8543 | if (a < target) | ||
8544 | /* This could still be the one, earlier ranges | ||
8545 | * could not. */ | ||
8546 | lo = mid; | ||
8547 | else | ||
8548 | /* This and later ranges are definitely out. */ | ||
8549 | hi = mid; | ||
8550 | } | ||
8551 | /* 'lo' might be the last that started before target, but 'hi' isn't */ | ||
8552 | if (hi > lo) { | ||
8553 | /* need to check all range that end after 's' to see if | ||
8554 | * any are unacknowledged. | ||
8555 | */ | ||
8556 | while (lo >= 0 && | ||
8557 | BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { | ||
8558 | if (BB_OFFSET(p[lo]) < target) { | ||
8559 | /* starts before the end, and finishes after | ||
8560 | * the start, so they must overlap | ||
8561 | */ | ||
8562 | if (rv != -1 && BB_ACK(p[lo])) | ||
8563 | rv = 1; | ||
8564 | else | ||
8565 | rv = -1; | ||
8566 | *first_bad = BB_OFFSET(p[lo]); | ||
8567 | *bad_sectors = BB_LEN(p[lo]); | ||
8568 | } | ||
8569 | lo--; | ||
8570 | } | ||
8571 | } | ||
8572 | |||
8573 | if (read_seqretry(&bb->lock, seq)) | ||
8574 | goto retry; | ||
8575 | |||
8576 | return rv; | ||
8577 | } | ||
8578 | EXPORT_SYMBOL_GPL(md_is_badblock); | ||
8579 | |||
8580 | /* | ||
8581 | * Add a range of bad blocks to the table. | ||
8582 | * This might extend the table, or might contract it | ||
8583 | * if two adjacent ranges can be merged. | ||
8584 | * We binary-search to find the 'insertion' point, then | ||
8585 | * decide how best to handle it. | ||
8586 | */ | ||
8587 | static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, | ||
8588 | int acknowledged) | ||
8589 | { | ||
8590 | u64 *p; | ||
8591 | int lo, hi; | ||
8592 | int rv = 1; | ||
8593 | unsigned long flags; | ||
8594 | |||
8595 | if (bb->shift < 0) | ||
8596 | /* badblocks are disabled */ | ||
8597 | return 0; | ||
8598 | |||
8599 | if (bb->shift) { | ||
8600 | /* round the start down, and the end up */ | ||
8601 | sector_t next = s + sectors; | ||
8602 | s >>= bb->shift; | ||
8603 | next += (1<<bb->shift) - 1; | ||
8604 | next >>= bb->shift; | ||
8605 | sectors = next - s; | ||
8606 | } | ||
8607 | |||
8608 | write_seqlock_irqsave(&bb->lock, flags); | ||
8609 | |||
8610 | p = bb->page; | ||
8611 | lo = 0; | ||
8612 | hi = bb->count; | ||
8613 | /* Find the last range that starts at-or-before 's' */ | ||
8614 | while (hi - lo > 1) { | ||
8615 | int mid = (lo + hi) / 2; | ||
8616 | sector_t a = BB_OFFSET(p[mid]); | ||
8617 | if (a <= s) | ||
8618 | lo = mid; | ||
8619 | else | ||
8620 | hi = mid; | ||
8621 | } | ||
8622 | if (hi > lo && BB_OFFSET(p[lo]) > s) | ||
8623 | hi = lo; | ||
8624 | |||
8625 | if (hi > lo) { | ||
8626 | /* we found a range that might merge with the start | ||
8627 | * of our new range | ||
8628 | */ | ||
8629 | sector_t a = BB_OFFSET(p[lo]); | ||
8630 | sector_t e = a + BB_LEN(p[lo]); | ||
8631 | int ack = BB_ACK(p[lo]); | ||
8632 | if (e >= s) { | ||
8633 | /* Yes, we can merge with a previous range */ | ||
8634 | if (s == a && s + sectors >= e) | ||
8635 | /* new range covers old */ | ||
8636 | ack = acknowledged; | ||
8637 | else | ||
8638 | ack = ack && acknowledged; | ||
8639 | |||
8640 | if (e < s + sectors) | ||
8641 | e = s + sectors; | ||
8642 | if (e - a <= BB_MAX_LEN) { | ||
8643 | p[lo] = BB_MAKE(a, e-a, ack); | ||
8644 | s = e; | ||
8645 | } else { | ||
8646 | /* does not all fit in one range, | ||
8647 | * make p[lo] maximal | ||
8648 | */ | ||
8649 | if (BB_LEN(p[lo]) != BB_MAX_LEN) | ||
8650 | p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); | ||
8651 | s = a + BB_MAX_LEN; | ||
8652 | } | ||
8653 | sectors = e - s; | ||
8654 | } | ||
8655 | } | ||
8656 | if (sectors && hi < bb->count) { | ||
8657 | /* 'hi' points to the first range that starts after 's'. | ||
8658 | * Maybe we can merge with the start of that range */ | ||
8659 | sector_t a = BB_OFFSET(p[hi]); | ||
8660 | sector_t e = a + BB_LEN(p[hi]); | ||
8661 | int ack = BB_ACK(p[hi]); | ||
8662 | if (a <= s + sectors) { | ||
8663 | /* merging is possible */ | ||
8664 | if (e <= s + sectors) { | ||
8665 | /* full overlap */ | ||
8666 | e = s + sectors; | ||
8667 | ack = acknowledged; | ||
8668 | } else | ||
8669 | ack = ack && acknowledged; | ||
8670 | |||
8671 | a = s; | ||
8672 | if (e - a <= BB_MAX_LEN) { | ||
8673 | p[hi] = BB_MAKE(a, e-a, ack); | ||
8674 | s = e; | ||
8675 | } else { | ||
8676 | p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); | ||
8677 | s = a + BB_MAX_LEN; | ||
8678 | } | ||
8679 | sectors = e - s; | ||
8680 | lo = hi; | ||
8681 | hi++; | ||
8682 | } | ||
8683 | } | ||
8684 | if (sectors == 0 && hi < bb->count) { | ||
8685 | /* we might be able to combine lo and hi */ | ||
8686 | /* Note: 's' is at the end of 'lo' */ | ||
8687 | sector_t a = BB_OFFSET(p[hi]); | ||
8688 | int lolen = BB_LEN(p[lo]); | ||
8689 | int hilen = BB_LEN(p[hi]); | ||
8690 | int newlen = lolen + hilen - (s - a); | ||
8691 | if (s >= a && newlen < BB_MAX_LEN) { | ||
8692 | /* yes, we can combine them */ | ||
8693 | int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); | ||
8694 | p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); | ||
8695 | memmove(p + hi, p + hi + 1, | ||
8696 | (bb->count - hi - 1) * 8); | ||
8697 | bb->count--; | ||
8698 | } | ||
8699 | } | ||
8700 | while (sectors) { | ||
8701 | /* didn't merge (it all). | ||
8702 | * Need to add a range just before 'hi' */ | ||
8703 | if (bb->count >= MD_MAX_BADBLOCKS) { | ||
8704 | /* No room for more */ | ||
8705 | rv = 0; | ||
8706 | break; | ||
8707 | } else { | ||
8708 | int this_sectors = sectors; | ||
8709 | memmove(p + hi + 1, p + hi, | ||
8710 | (bb->count - hi) * 8); | ||
8711 | bb->count++; | ||
8712 | |||
8713 | if (this_sectors > BB_MAX_LEN) | ||
8714 | this_sectors = BB_MAX_LEN; | ||
8715 | p[hi] = BB_MAKE(s, this_sectors, acknowledged); | ||
8716 | sectors -= this_sectors; | ||
8717 | s += this_sectors; | ||
8718 | } | ||
8719 | } | ||
8720 | |||
8721 | bb->changed = 1; | ||
8722 | if (!acknowledged) | ||
8723 | bb->unacked_exist = 1; | ||
8724 | write_sequnlock_irqrestore(&bb->lock, flags); | ||
8725 | |||
8726 | return rv; | ||
8727 | } | ||
8728 | 8478 | ||
8479 | /* Returns 1 on success, 0 on failure */ | ||
8729 | int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | 8480 | int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, |
8730 | int is_new) | 8481 | int is_new) |
8731 | { | 8482 | { |
@@ -8734,114 +8485,19 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | |||
8734 | s += rdev->new_data_offset; | 8485 | s += rdev->new_data_offset; |
8735 | else | 8486 | else |
8736 | s += rdev->data_offset; | 8487 | s += rdev->data_offset; |
8737 | rv = md_set_badblocks(&rdev->badblocks, | 8488 | rv = badblocks_set(&rdev->badblocks, s, sectors, 0); |
8738 | s, sectors, 0); | 8489 | if (rv == 0) { |
8739 | if (rv) { | ||
8740 | /* Make sure they get written out promptly */ | 8490 | /* Make sure they get written out promptly */ |
8741 | sysfs_notify_dirent_safe(rdev->sysfs_state); | 8491 | sysfs_notify_dirent_safe(rdev->sysfs_state); |
8742 | set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags); | 8492 | set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags); |
8743 | set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags); | 8493 | set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags); |
8744 | md_wakeup_thread(rdev->mddev->thread); | 8494 | md_wakeup_thread(rdev->mddev->thread); |
8745 | } | 8495 | return 1; |
8746 | return rv; | 8496 | } else |
8497 | return 0; | ||
8747 | } | 8498 | } |
8748 | EXPORT_SYMBOL_GPL(rdev_set_badblocks); | 8499 | EXPORT_SYMBOL_GPL(rdev_set_badblocks); |
8749 | 8500 | ||
8750 | /* | ||
8751 | * Remove a range of bad blocks from the table. | ||
8752 | * This may involve extending the table if we spilt a region, | ||
8753 | * but it must not fail. So if the table becomes full, we just | ||
8754 | * drop the remove request. | ||
8755 | */ | ||
8756 | static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors) | ||
8757 | { | ||
8758 | u64 *p; | ||
8759 | int lo, hi; | ||
8760 | sector_t target = s + sectors; | ||
8761 | int rv = 0; | ||
8762 | |||
8763 | if (bb->shift > 0) { | ||
8764 | /* When clearing we round the start up and the end down. | ||
8765 | * This should not matter as the shift should align with | ||
8766 | * the block size and no rounding should ever be needed. | ||
8767 | * However it is better the think a block is bad when it | ||
8768 | * isn't than to think a block is not bad when it is. | ||
8769 | */ | ||
8770 | s += (1<<bb->shift) - 1; | ||
8771 | s >>= bb->shift; | ||
8772 | target >>= bb->shift; | ||
8773 | sectors = target - s; | ||
8774 | } | ||
8775 | |||
8776 | write_seqlock_irq(&bb->lock); | ||
8777 | |||
8778 | p = bb->page; | ||
8779 | lo = 0; | ||
8780 | hi = bb->count; | ||
8781 | /* Find the last range that starts before 'target' */ | ||
8782 | while (hi - lo > 1) { | ||
8783 | int mid = (lo + hi) / 2; | ||
8784 | sector_t a = BB_OFFSET(p[mid]); | ||
8785 | if (a < target) | ||
8786 | lo = mid; | ||
8787 | else | ||
8788 | hi = mid; | ||
8789 | } | ||
8790 | if (hi > lo) { | ||
8791 | /* p[lo] is the last range that could overlap the | ||
8792 | * current range. Earlier ranges could also overlap, | ||
8793 | * but only this one can overlap the end of the range. | ||
8794 | */ | ||
8795 | if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) { | ||
8796 | /* Partial overlap, leave the tail of this range */ | ||
8797 | int ack = BB_ACK(p[lo]); | ||
8798 | sector_t a = BB_OFFSET(p[lo]); | ||
8799 | sector_t end = a + BB_LEN(p[lo]); | ||
8800 | |||
8801 | if (a < s) { | ||
8802 | /* we need to split this range */ | ||
8803 | if (bb->count >= MD_MAX_BADBLOCKS) { | ||
8804 | rv = -ENOSPC; | ||
8805 | goto out; | ||
8806 | } | ||
8807 | memmove(p+lo+1, p+lo, (bb->count - lo) * 8); | ||
8808 | bb->count++; | ||
8809 | p[lo] = BB_MAKE(a, s-a, ack); | ||
8810 | lo++; | ||
8811 | } | ||
8812 | p[lo] = BB_MAKE(target, end - target, ack); | ||
8813 | /* there is no longer an overlap */ | ||
8814 | hi = lo; | ||
8815 | lo--; | ||
8816 | } | ||
8817 | while (lo >= 0 && | ||
8818 | BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { | ||
8819 | /* This range does overlap */ | ||
8820 | if (BB_OFFSET(p[lo]) < s) { | ||
8821 | /* Keep the early parts of this range. */ | ||
8822 | int ack = BB_ACK(p[lo]); | ||
8823 | sector_t start = BB_OFFSET(p[lo]); | ||
8824 | p[lo] = BB_MAKE(start, s - start, ack); | ||
8825 | /* now low doesn't overlap, so.. */ | ||
8826 | break; | ||
8827 | } | ||
8828 | lo--; | ||
8829 | } | ||
8830 | /* 'lo' is strictly before, 'hi' is strictly after, | ||
8831 | * anything between needs to be discarded | ||
8832 | */ | ||
8833 | if (hi - lo > 1) { | ||
8834 | memmove(p+lo+1, p+hi, (bb->count - hi) * 8); | ||
8835 | bb->count -= (hi - lo - 1); | ||
8836 | } | ||
8837 | } | ||
8838 | |||
8839 | bb->changed = 1; | ||
8840 | out: | ||
8841 | write_sequnlock_irq(&bb->lock); | ||
8842 | return rv; | ||
8843 | } | ||
8844 | |||
8845 | int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | 8501 | int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, |
8846 | int is_new) | 8502 | int is_new) |
8847 | { | 8503 | { |
@@ -8849,133 +8505,11 @@ int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | |||
8849 | s += rdev->new_data_offset; | 8505 | s += rdev->new_data_offset; |
8850 | else | 8506 | else |
8851 | s += rdev->data_offset; | 8507 | s += rdev->data_offset; |
8852 | return md_clear_badblocks(&rdev->badblocks, | 8508 | return badblocks_clear(&rdev->badblocks, |
8853 | s, sectors); | 8509 | s, sectors); |
8854 | } | 8510 | } |
8855 | EXPORT_SYMBOL_GPL(rdev_clear_badblocks); | 8511 | EXPORT_SYMBOL_GPL(rdev_clear_badblocks); |
8856 | 8512 | ||
8857 | /* | ||
8858 | * Acknowledge all bad blocks in a list. | ||
8859 | * This only succeeds if ->changed is clear. It is used by | ||
8860 | * in-kernel metadata updates | ||
8861 | */ | ||
8862 | void md_ack_all_badblocks(struct badblocks *bb) | ||
8863 | { | ||
8864 | if (bb->page == NULL || bb->changed) | ||
8865 | /* no point even trying */ | ||
8866 | return; | ||
8867 | write_seqlock_irq(&bb->lock); | ||
8868 | |||
8869 | if (bb->changed == 0 && bb->unacked_exist) { | ||
8870 | u64 *p = bb->page; | ||
8871 | int i; | ||
8872 | for (i = 0; i < bb->count ; i++) { | ||
8873 | if (!BB_ACK(p[i])) { | ||
8874 | sector_t start = BB_OFFSET(p[i]); | ||
8875 | int len = BB_LEN(p[i]); | ||
8876 | p[i] = BB_MAKE(start, len, 1); | ||
8877 | } | ||
8878 | } | ||
8879 | bb->unacked_exist = 0; | ||
8880 | } | ||
8881 | write_sequnlock_irq(&bb->lock); | ||
8882 | } | ||
8883 | EXPORT_SYMBOL_GPL(md_ack_all_badblocks); | ||
8884 | |||
8885 | /* sysfs access to bad-blocks list. | ||
8886 | * We present two files. | ||
8887 | * 'bad-blocks' lists sector numbers and lengths of ranges that | ||
8888 | * are recorded as bad. The list is truncated to fit within | ||
8889 | * the one-page limit of sysfs. | ||
8890 | * Writing "sector length" to this file adds an acknowledged | ||
8891 | * bad block list. | ||
8892 | * 'unacknowledged-bad-blocks' lists bad blocks that have not yet | ||
8893 | * been acknowledged. Writing to this file adds bad blocks | ||
8894 | * without acknowledging them. This is largely for testing. | ||
8895 | */ | ||
8896 | |||
8897 | static ssize_t | ||
8898 | badblocks_show(struct badblocks *bb, char *page, int unack) | ||
8899 | { | ||
8900 | size_t len; | ||
8901 | int i; | ||
8902 | u64 *p = bb->page; | ||
8903 | unsigned seq; | ||
8904 | |||
8905 | if (bb->shift < 0) | ||
8906 | return 0; | ||
8907 | |||
8908 | retry: | ||
8909 | seq = read_seqbegin(&bb->lock); | ||
8910 | |||
8911 | len = 0; | ||
8912 | i = 0; | ||
8913 | |||
8914 | while (len < PAGE_SIZE && i < bb->count) { | ||
8915 | sector_t s = BB_OFFSET(p[i]); | ||
8916 | unsigned int length = BB_LEN(p[i]); | ||
8917 | int ack = BB_ACK(p[i]); | ||
8918 | i++; | ||
8919 | |||
8920 | if (unack && ack) | ||
8921 | continue; | ||
8922 | |||
8923 | len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", | ||
8924 | (unsigned long long)s << bb->shift, | ||
8925 | length << bb->shift); | ||
8926 | } | ||
8927 | if (unack && len == 0) | ||
8928 | bb->unacked_exist = 0; | ||
8929 | |||
8930 | if (read_seqretry(&bb->lock, seq)) | ||
8931 | goto retry; | ||
8932 | |||
8933 | return len; | ||
8934 | } | ||
8935 | |||
8936 | #define DO_DEBUG 1 | ||
8937 | |||
8938 | static ssize_t | ||
8939 | badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack) | ||
8940 | { | ||
8941 | unsigned long long sector; | ||
8942 | int length; | ||
8943 | char newline; | ||
8944 | #ifdef DO_DEBUG | ||
8945 | /* Allow clearing via sysfs *only* for testing/debugging. | ||
8946 | * Normally only a successful write may clear a badblock | ||
8947 | */ | ||
8948 | int clear = 0; | ||
8949 | if (page[0] == '-') { | ||
8950 | clear = 1; | ||
8951 | page++; | ||
8952 | } | ||
8953 | #endif /* DO_DEBUG */ | ||
8954 | |||
8955 | switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { | ||
8956 | case 3: | ||
8957 | if (newline != '\n') | ||
8958 | return -EINVAL; | ||
8959 | case 2: | ||
8960 | if (length <= 0) | ||
8961 | return -EINVAL; | ||
8962 | break; | ||
8963 | default: | ||
8964 | return -EINVAL; | ||
8965 | } | ||
8966 | |||
8967 | #ifdef DO_DEBUG | ||
8968 | if (clear) { | ||
8969 | md_clear_badblocks(bb, sector, length); | ||
8970 | return len; | ||
8971 | } | ||
8972 | #endif /* DO_DEBUG */ | ||
8973 | if (md_set_badblocks(bb, sector, length, !unack)) | ||
8974 | return len; | ||
8975 | else | ||
8976 | return -ENOSPC; | ||
8977 | } | ||
8978 | |||
8979 | static int md_notify_reboot(struct notifier_block *this, | 8513 | static int md_notify_reboot(struct notifier_block *this, |
8980 | unsigned long code, void *x) | 8514 | unsigned long code, void *x) |
8981 | { | 8515 | { |
diff --git a/drivers/md/md.h b/drivers/md/md.h index 2bea51edfab7..389afc420db6 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
19 | #include <linux/backing-dev.h> | 19 | #include <linux/backing-dev.h> |
20 | #include <linux/badblocks.h> | ||
20 | #include <linux/kobject.h> | 21 | #include <linux/kobject.h> |
21 | #include <linux/list.h> | 22 | #include <linux/list.h> |
22 | #include <linux/mm.h> | 23 | #include <linux/mm.h> |
@@ -28,13 +29,6 @@ | |||
28 | 29 | ||
29 | #define MaxSector (~(sector_t)0) | 30 | #define MaxSector (~(sector_t)0) |
30 | 31 | ||
31 | /* Bad block numbers are stored sorted in a single page. | ||
32 | * 64bits is used for each block or extent. | ||
33 | * 54 bits are sector number, 9 bits are extent size, | ||
34 | * 1 bit is an 'acknowledged' flag. | ||
35 | */ | ||
36 | #define MD_MAX_BADBLOCKS (PAGE_SIZE/8) | ||
37 | |||
38 | /* | 32 | /* |
39 | * MD's 'extended' device | 33 | * MD's 'extended' device |
40 | */ | 34 | */ |
@@ -117,22 +111,7 @@ struct md_rdev { | |||
117 | struct kernfs_node *sysfs_state; /* handle for 'state' | 111 | struct kernfs_node *sysfs_state; /* handle for 'state' |
118 | * sysfs entry */ | 112 | * sysfs entry */ |
119 | 113 | ||
120 | struct badblocks { | 114 | struct badblocks badblocks; |
121 | int count; /* count of bad blocks */ | ||
122 | int unacked_exist; /* there probably are unacknowledged | ||
123 | * bad blocks. This is only cleared | ||
124 | * when a read discovers none | ||
125 | */ | ||
126 | int shift; /* shift from sectors to block size | ||
127 | * a -ve shift means badblocks are | ||
128 | * disabled.*/ | ||
129 | u64 *page; /* badblock list */ | ||
130 | int changed; | ||
131 | seqlock_t lock; | ||
132 | |||
133 | sector_t sector; | ||
134 | sector_t size; /* in sectors */ | ||
135 | } badblocks; | ||
136 | }; | 115 | }; |
137 | enum flag_bits { | 116 | enum flag_bits { |
138 | Faulty, /* device is known to have a fault */ | 117 | Faulty, /* device is known to have a fault */ |
@@ -185,22 +164,11 @@ enum flag_bits { | |||
185 | */ | 164 | */ |
186 | }; | 165 | }; |
187 | 166 | ||
188 | #define BB_LEN_MASK (0x00000000000001FFULL) | ||
189 | #define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL) | ||
190 | #define BB_ACK_MASK (0x8000000000000000ULL) | ||
191 | #define BB_MAX_LEN 512 | ||
192 | #define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) | ||
193 | #define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) | ||
194 | #define BB_ACK(x) (!!((x) & BB_ACK_MASK)) | ||
195 | #define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) | ||
196 | |||
197 | extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors, | ||
198 | sector_t *first_bad, int *bad_sectors); | ||
199 | static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, | 167 | static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, |
200 | sector_t *first_bad, int *bad_sectors) | 168 | sector_t *first_bad, int *bad_sectors) |
201 | { | 169 | { |
202 | if (unlikely(rdev->badblocks.count)) { | 170 | if (unlikely(rdev->badblocks.count)) { |
203 | int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s, | 171 | int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s, |
204 | sectors, | 172 | sectors, |
205 | first_bad, bad_sectors); | 173 | first_bad, bad_sectors); |
206 | if (rv) | 174 | if (rv) |
@@ -213,8 +181,6 @@ extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | |||
213 | int is_new); | 181 | int is_new); |
214 | extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, | 182 | extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, |
215 | int is_new); | 183 | int is_new); |
216 | extern void md_ack_all_badblocks(struct badblocks *bb); | ||
217 | |||
218 | struct md_cluster_info; | 184 | struct md_cluster_info; |
219 | 185 | ||
220 | struct mddev { | 186 | struct mddev { |