diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-21 13:29:12 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-21 13:29:12 -0400 |
commit | 8a392625b665c676a77c62f8608d10ff430bcb83 (patch) | |
tree | 4000a65d61baed73200e47f91dea5263ed16edd0 /drivers/md/raid5.c | |
parent | 519f0141f1c42e2b8b59c7dea005cbf6095358e8 (diff) | |
parent | 4b80991c6cb9efa607bc4fd6f3ecdf5511c31bb0 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (52 commits)
md: Protect access to mddev->disks list using RCU
md: only count actual openers as access which prevent a 'stop'
md: linear: Make array_size sector-based and rename it to array_sectors.
md: Make mddev->array_size sector-based.
md: Make super_type->rdev_size_change() take sector-based sizes.
md: Fix check for overlapping devices.
md: Tidy up rdev_size_store a bit:
md: Remove some unused macros.
md: Turn rdev->sb_offset into a sector-based quantity.
md: Make calc_dev_sboffset() return a sector count.
md: Replace calc_dev_size() by calc_num_sectors().
md: Make update_size() take the number of sectors.
md: Better control of when do_md_stop is allowed to stop the array.
md: get_disk_info(): Don't convert between signed and unsigned and back.
md: Simplify restart_array().
md: alloc_disk_sb(): Return proper error value.
md: Simplify sb_equal().
md: Simplify uuid_equal().
md: sb_equal(): Fix misleading printk.
md: Fix a typo in the comment to cmd_match().
...
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 745 |
1 files changed, 255 insertions, 490 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 9ce7154845c6..55e7c56045a0 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c | |||
@@ -115,15 +115,20 @@ static void return_io(struct bio *return_bi) | |||
115 | return_bi = bi->bi_next; | 115 | return_bi = bi->bi_next; |
116 | bi->bi_next = NULL; | 116 | bi->bi_next = NULL; |
117 | bi->bi_size = 0; | 117 | bi->bi_size = 0; |
118 | bi->bi_end_io(bi, | 118 | bio_endio(bi, 0); |
119 | test_bit(BIO_UPTODATE, &bi->bi_flags) | ||
120 | ? 0 : -EIO); | ||
121 | bi = return_bi; | 119 | bi = return_bi; |
122 | } | 120 | } |
123 | } | 121 | } |
124 | 122 | ||
125 | static void print_raid5_conf (raid5_conf_t *conf); | 123 | static void print_raid5_conf (raid5_conf_t *conf); |
126 | 124 | ||
125 | static int stripe_operations_active(struct stripe_head *sh) | ||
126 | { | ||
127 | return sh->check_state || sh->reconstruct_state || | ||
128 | test_bit(STRIPE_BIOFILL_RUN, &sh->state) || | ||
129 | test_bit(STRIPE_COMPUTE_RUN, &sh->state); | ||
130 | } | ||
131 | |||
127 | static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | 132 | static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) |
128 | { | 133 | { |
129 | if (atomic_dec_and_test(&sh->count)) { | 134 | if (atomic_dec_and_test(&sh->count)) { |
@@ -143,7 +148,7 @@ static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) | |||
143 | } | 148 | } |
144 | md_wakeup_thread(conf->mddev->thread); | 149 | md_wakeup_thread(conf->mddev->thread); |
145 | } else { | 150 | } else { |
146 | BUG_ON(sh->ops.pending); | 151 | BUG_ON(stripe_operations_active(sh)); |
147 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { | 152 | if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { |
148 | atomic_dec(&conf->preread_active_stripes); | 153 | atomic_dec(&conf->preread_active_stripes); |
149 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) | 154 | if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) |
@@ -245,7 +250,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int | |||
245 | 250 | ||
246 | BUG_ON(atomic_read(&sh->count) != 0); | 251 | BUG_ON(atomic_read(&sh->count) != 0); |
247 | BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); | 252 | BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); |
248 | BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete); | 253 | BUG_ON(stripe_operations_active(sh)); |
249 | 254 | ||
250 | CHECK_DEVLOCK(); | 255 | CHECK_DEVLOCK(); |
251 | pr_debug("init_stripe called, stripe %llu\n", | 256 | pr_debug("init_stripe called, stripe %llu\n", |
@@ -346,62 +351,18 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector | |||
346 | return sh; | 351 | return sh; |
347 | } | 352 | } |
348 | 353 | ||
349 | /* test_and_ack_op() ensures that we only dequeue an operation once */ | ||
350 | #define test_and_ack_op(op, pend) \ | ||
351 | do { \ | ||
352 | if (test_bit(op, &sh->ops.pending) && \ | ||
353 | !test_bit(op, &sh->ops.complete)) { \ | ||
354 | if (test_and_set_bit(op, &sh->ops.ack)) \ | ||
355 | clear_bit(op, &pend); \ | ||
356 | else \ | ||
357 | ack++; \ | ||
358 | } else \ | ||
359 | clear_bit(op, &pend); \ | ||
360 | } while (0) | ||
361 | |||
362 | /* find new work to run, do not resubmit work that is already | ||
363 | * in flight | ||
364 | */ | ||
365 | static unsigned long get_stripe_work(struct stripe_head *sh) | ||
366 | { | ||
367 | unsigned long pending; | ||
368 | int ack = 0; | ||
369 | |||
370 | pending = sh->ops.pending; | ||
371 | |||
372 | test_and_ack_op(STRIPE_OP_BIOFILL, pending); | ||
373 | test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending); | ||
374 | test_and_ack_op(STRIPE_OP_PREXOR, pending); | ||
375 | test_and_ack_op(STRIPE_OP_BIODRAIN, pending); | ||
376 | test_and_ack_op(STRIPE_OP_POSTXOR, pending); | ||
377 | test_and_ack_op(STRIPE_OP_CHECK, pending); | ||
378 | if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending)) | ||
379 | ack++; | ||
380 | |||
381 | sh->ops.count -= ack; | ||
382 | if (unlikely(sh->ops.count < 0)) { | ||
383 | printk(KERN_ERR "pending: %#lx ops.pending: %#lx ops.ack: %#lx " | ||
384 | "ops.complete: %#lx\n", pending, sh->ops.pending, | ||
385 | sh->ops.ack, sh->ops.complete); | ||
386 | BUG(); | ||
387 | } | ||
388 | |||
389 | return pending; | ||
390 | } | ||
391 | |||
392 | static void | 354 | static void |
393 | raid5_end_read_request(struct bio *bi, int error); | 355 | raid5_end_read_request(struct bio *bi, int error); |
394 | static void | 356 | static void |
395 | raid5_end_write_request(struct bio *bi, int error); | 357 | raid5_end_write_request(struct bio *bi, int error); |
396 | 358 | ||
397 | static void ops_run_io(struct stripe_head *sh) | 359 | static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) |
398 | { | 360 | { |
399 | raid5_conf_t *conf = sh->raid_conf; | 361 | raid5_conf_t *conf = sh->raid_conf; |
400 | int i, disks = sh->disks; | 362 | int i, disks = sh->disks; |
401 | 363 | ||
402 | might_sleep(); | 364 | might_sleep(); |
403 | 365 | ||
404 | set_bit(STRIPE_IO_STARTED, &sh->state); | ||
405 | for (i = disks; i--; ) { | 366 | for (i = disks; i--; ) { |
406 | int rw; | 367 | int rw; |
407 | struct bio *bi; | 368 | struct bio *bi; |
@@ -430,11 +391,11 @@ static void ops_run_io(struct stripe_head *sh) | |||
430 | rcu_read_unlock(); | 391 | rcu_read_unlock(); |
431 | 392 | ||
432 | if (rdev) { | 393 | if (rdev) { |
433 | if (test_bit(STRIPE_SYNCING, &sh->state) || | 394 | if (s->syncing || s->expanding || s->expanded) |
434 | test_bit(STRIPE_EXPAND_SOURCE, &sh->state) || | ||
435 | test_bit(STRIPE_EXPAND_READY, &sh->state)) | ||
436 | md_sync_acct(rdev->bdev, STRIPE_SECTORS); | 395 | md_sync_acct(rdev->bdev, STRIPE_SECTORS); |
437 | 396 | ||
397 | set_bit(STRIPE_IO_STARTED, &sh->state); | ||
398 | |||
438 | bi->bi_bdev = rdev->bdev; | 399 | bi->bi_bdev = rdev->bdev; |
439 | pr_debug("%s: for %llu schedule op %ld on disc %d\n", | 400 | pr_debug("%s: for %llu schedule op %ld on disc %d\n", |
440 | __func__, (unsigned long long)sh->sector, | 401 | __func__, (unsigned long long)sh->sector, |
@@ -528,38 +489,34 @@ static void ops_complete_biofill(void *stripe_head_ref) | |||
528 | (unsigned long long)sh->sector); | 489 | (unsigned long long)sh->sector); |
529 | 490 | ||
530 | /* clear completed biofills */ | 491 | /* clear completed biofills */ |
492 | spin_lock_irq(&conf->device_lock); | ||
531 | for (i = sh->disks; i--; ) { | 493 | for (i = sh->disks; i--; ) { |
532 | struct r5dev *dev = &sh->dev[i]; | 494 | struct r5dev *dev = &sh->dev[i]; |
533 | 495 | ||
534 | /* acknowledge completion of a biofill operation */ | 496 | /* acknowledge completion of a biofill operation */ |
535 | /* and check if we need to reply to a read request, | 497 | /* and check if we need to reply to a read request, |
536 | * new R5_Wantfill requests are held off until | 498 | * new R5_Wantfill requests are held off until |
537 | * !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending) | 499 | * !STRIPE_BIOFILL_RUN |
538 | */ | 500 | */ |
539 | if (test_and_clear_bit(R5_Wantfill, &dev->flags)) { | 501 | if (test_and_clear_bit(R5_Wantfill, &dev->flags)) { |
540 | struct bio *rbi, *rbi2; | 502 | struct bio *rbi, *rbi2; |
541 | 503 | ||
542 | /* The access to dev->read is outside of the | ||
543 | * spin_lock_irq(&conf->device_lock), but is protected | ||
544 | * by the STRIPE_OP_BIOFILL pending bit | ||
545 | */ | ||
546 | BUG_ON(!dev->read); | 504 | BUG_ON(!dev->read); |
547 | rbi = dev->read; | 505 | rbi = dev->read; |
548 | dev->read = NULL; | 506 | dev->read = NULL; |
549 | while (rbi && rbi->bi_sector < | 507 | while (rbi && rbi->bi_sector < |
550 | dev->sector + STRIPE_SECTORS) { | 508 | dev->sector + STRIPE_SECTORS) { |
551 | rbi2 = r5_next_bio(rbi, dev->sector); | 509 | rbi2 = r5_next_bio(rbi, dev->sector); |
552 | spin_lock_irq(&conf->device_lock); | ||
553 | if (--rbi->bi_phys_segments == 0) { | 510 | if (--rbi->bi_phys_segments == 0) { |
554 | rbi->bi_next = return_bi; | 511 | rbi->bi_next = return_bi; |
555 | return_bi = rbi; | 512 | return_bi = rbi; |
556 | } | 513 | } |
557 | spin_unlock_irq(&conf->device_lock); | ||
558 | rbi = rbi2; | 514 | rbi = rbi2; |
559 | } | 515 | } |
560 | } | 516 | } |
561 | } | 517 | } |
562 | set_bit(STRIPE_OP_BIOFILL, &sh->ops.complete); | 518 | spin_unlock_irq(&conf->device_lock); |
519 | clear_bit(STRIPE_BIOFILL_RUN, &sh->state); | ||
563 | 520 | ||
564 | return_io(return_bi); | 521 | return_io(return_bi); |
565 | 522 | ||
@@ -610,13 +567,14 @@ static void ops_complete_compute5(void *stripe_head_ref) | |||
610 | set_bit(R5_UPTODATE, &tgt->flags); | 567 | set_bit(R5_UPTODATE, &tgt->flags); |
611 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); | 568 | BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); |
612 | clear_bit(R5_Wantcompute, &tgt->flags); | 569 | clear_bit(R5_Wantcompute, &tgt->flags); |
613 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | 570 | clear_bit(STRIPE_COMPUTE_RUN, &sh->state); |
571 | if (sh->check_state == check_state_compute_run) | ||
572 | sh->check_state = check_state_compute_result; | ||
614 | set_bit(STRIPE_HANDLE, &sh->state); | 573 | set_bit(STRIPE_HANDLE, &sh->state); |
615 | release_stripe(sh); | 574 | release_stripe(sh); |
616 | } | 575 | } |
617 | 576 | ||
618 | static struct dma_async_tx_descriptor * | 577 | static struct dma_async_tx_descriptor *ops_run_compute5(struct stripe_head *sh) |
619 | ops_run_compute5(struct stripe_head *sh, unsigned long pending) | ||
620 | { | 578 | { |
621 | /* kernel stack size limits the total number of disks */ | 579 | /* kernel stack size limits the total number of disks */ |
622 | int disks = sh->disks; | 580 | int disks = sh->disks; |
@@ -646,10 +604,6 @@ ops_run_compute5(struct stripe_head *sh, unsigned long pending) | |||
646 | ASYNC_TX_XOR_ZERO_DST, NULL, | 604 | ASYNC_TX_XOR_ZERO_DST, NULL, |
647 | ops_complete_compute5, sh); | 605 | ops_complete_compute5, sh); |
648 | 606 | ||
649 | /* ack now if postxor is not set to be run */ | ||
650 | if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending)) | ||
651 | async_tx_ack(tx); | ||
652 | |||
653 | return tx; | 607 | return tx; |
654 | } | 608 | } |
655 | 609 | ||
@@ -659,8 +613,6 @@ static void ops_complete_prexor(void *stripe_head_ref) | |||
659 | 613 | ||
660 | pr_debug("%s: stripe %llu\n", __func__, | 614 | pr_debug("%s: stripe %llu\n", __func__, |
661 | (unsigned long long)sh->sector); | 615 | (unsigned long long)sh->sector); |
662 | |||
663 | set_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | ||
664 | } | 616 | } |
665 | 617 | ||
666 | static struct dma_async_tx_descriptor * | 618 | static struct dma_async_tx_descriptor * |
@@ -680,7 +632,7 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
680 | for (i = disks; i--; ) { | 632 | for (i = disks; i--; ) { |
681 | struct r5dev *dev = &sh->dev[i]; | 633 | struct r5dev *dev = &sh->dev[i]; |
682 | /* Only process blocks that are known to be uptodate */ | 634 | /* Only process blocks that are known to be uptodate */ |
683 | if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags)) | 635 | if (test_bit(R5_Wantdrain, &dev->flags)) |
684 | xor_srcs[count++] = dev->page; | 636 | xor_srcs[count++] = dev->page; |
685 | } | 637 | } |
686 | 638 | ||
@@ -692,16 +644,10 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) | |||
692 | } | 644 | } |
693 | 645 | ||
694 | static struct dma_async_tx_descriptor * | 646 | static struct dma_async_tx_descriptor * |
695 | ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | 647 | ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) |
696 | unsigned long pending) | ||
697 | { | 648 | { |
698 | int disks = sh->disks; | 649 | int disks = sh->disks; |
699 | int pd_idx = sh->pd_idx, i; | 650 | int i; |
700 | |||
701 | /* check if prexor is active which means only process blocks | ||
702 | * that are part of a read-modify-write (Wantprexor) | ||
703 | */ | ||
704 | int prexor = test_bit(STRIPE_OP_PREXOR, &pending); | ||
705 | 651 | ||
706 | pr_debug("%s: stripe %llu\n", __func__, | 652 | pr_debug("%s: stripe %llu\n", __func__, |
707 | (unsigned long long)sh->sector); | 653 | (unsigned long long)sh->sector); |
@@ -709,20 +655,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | |||
709 | for (i = disks; i--; ) { | 655 | for (i = disks; i--; ) { |
710 | struct r5dev *dev = &sh->dev[i]; | 656 | struct r5dev *dev = &sh->dev[i]; |
711 | struct bio *chosen; | 657 | struct bio *chosen; |
712 | int towrite; | ||
713 | |||
714 | towrite = 0; | ||
715 | if (prexor) { /* rmw */ | ||
716 | if (dev->towrite && | ||
717 | test_bit(R5_Wantprexor, &dev->flags)) | ||
718 | towrite = 1; | ||
719 | } else { /* rcw */ | ||
720 | if (i != pd_idx && dev->towrite && | ||
721 | test_bit(R5_LOCKED, &dev->flags)) | ||
722 | towrite = 1; | ||
723 | } | ||
724 | 658 | ||
725 | if (towrite) { | 659 | if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { |
726 | struct bio *wbi; | 660 | struct bio *wbi; |
727 | 661 | ||
728 | spin_lock(&sh->lock); | 662 | spin_lock(&sh->lock); |
@@ -747,18 +681,6 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | |||
747 | static void ops_complete_postxor(void *stripe_head_ref) | 681 | static void ops_complete_postxor(void *stripe_head_ref) |
748 | { | 682 | { |
749 | struct stripe_head *sh = stripe_head_ref; | 683 | struct stripe_head *sh = stripe_head_ref; |
750 | |||
751 | pr_debug("%s: stripe %llu\n", __func__, | ||
752 | (unsigned long long)sh->sector); | ||
753 | |||
754 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); | ||
755 | set_bit(STRIPE_HANDLE, &sh->state); | ||
756 | release_stripe(sh); | ||
757 | } | ||
758 | |||
759 | static void ops_complete_write(void *stripe_head_ref) | ||
760 | { | ||
761 | struct stripe_head *sh = stripe_head_ref; | ||
762 | int disks = sh->disks, i, pd_idx = sh->pd_idx; | 684 | int disks = sh->disks, i, pd_idx = sh->pd_idx; |
763 | 685 | ||
764 | pr_debug("%s: stripe %llu\n", __func__, | 686 | pr_debug("%s: stripe %llu\n", __func__, |
@@ -770,16 +692,21 @@ static void ops_complete_write(void *stripe_head_ref) | |||
770 | set_bit(R5_UPTODATE, &dev->flags); | 692 | set_bit(R5_UPTODATE, &dev->flags); |
771 | } | 693 | } |
772 | 694 | ||
773 | set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); | 695 | if (sh->reconstruct_state == reconstruct_state_drain_run) |
774 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); | 696 | sh->reconstruct_state = reconstruct_state_drain_result; |
697 | else if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) | ||
698 | sh->reconstruct_state = reconstruct_state_prexor_drain_result; | ||
699 | else { | ||
700 | BUG_ON(sh->reconstruct_state != reconstruct_state_run); | ||
701 | sh->reconstruct_state = reconstruct_state_result; | ||
702 | } | ||
775 | 703 | ||
776 | set_bit(STRIPE_HANDLE, &sh->state); | 704 | set_bit(STRIPE_HANDLE, &sh->state); |
777 | release_stripe(sh); | 705 | release_stripe(sh); |
778 | } | 706 | } |
779 | 707 | ||
780 | static void | 708 | static void |
781 | ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | 709 | ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) |
782 | unsigned long pending) | ||
783 | { | 710 | { |
784 | /* kernel stack size limits the total number of disks */ | 711 | /* kernel stack size limits the total number of disks */ |
785 | int disks = sh->disks; | 712 | int disks = sh->disks; |
@@ -787,9 +714,8 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | |||
787 | 714 | ||
788 | int count = 0, pd_idx = sh->pd_idx, i; | 715 | int count = 0, pd_idx = sh->pd_idx, i; |
789 | struct page *xor_dest; | 716 | struct page *xor_dest; |
790 | int prexor = test_bit(STRIPE_OP_PREXOR, &pending); | 717 | int prexor = 0; |
791 | unsigned long flags; | 718 | unsigned long flags; |
792 | dma_async_tx_callback callback; | ||
793 | 719 | ||
794 | pr_debug("%s: stripe %llu\n", __func__, | 720 | pr_debug("%s: stripe %llu\n", __func__, |
795 | (unsigned long long)sh->sector); | 721 | (unsigned long long)sh->sector); |
@@ -797,7 +723,8 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | |||
797 | /* check if prexor is active which means only process blocks | 723 | /* check if prexor is active which means only process blocks |
798 | * that are part of a read-modify-write (written) | 724 | * that are part of a read-modify-write (written) |
799 | */ | 725 | */ |
800 | if (prexor) { | 726 | if (sh->reconstruct_state == reconstruct_state_prexor_drain_run) { |
727 | prexor = 1; | ||
801 | xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; | 728 | xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; |
802 | for (i = disks; i--; ) { | 729 | for (i = disks; i--; ) { |
803 | struct r5dev *dev = &sh->dev[i]; | 730 | struct r5dev *dev = &sh->dev[i]; |
@@ -813,10 +740,6 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | |||
813 | } | 740 | } |
814 | } | 741 | } |
815 | 742 | ||
816 | /* check whether this postxor is part of a write */ | ||
817 | callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ? | ||
818 | ops_complete_write : ops_complete_postxor; | ||
819 | |||
820 | /* 1/ if we prexor'd then the dest is reused as a source | 743 | /* 1/ if we prexor'd then the dest is reused as a source |
821 | * 2/ if we did not prexor then we are redoing the parity | 744 | * 2/ if we did not prexor then we are redoing the parity |
822 | * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST | 745 | * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST |
@@ -830,25 +753,20 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx, | |||
830 | if (unlikely(count == 1)) { | 753 | if (unlikely(count == 1)) { |
831 | flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); | 754 | flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); |
832 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, | 755 | tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, |
833 | flags, tx, callback, sh); | 756 | flags, tx, ops_complete_postxor, sh); |
834 | } else | 757 | } else |
835 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 758 | tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, |
836 | flags, tx, callback, sh); | 759 | flags, tx, ops_complete_postxor, sh); |
837 | } | 760 | } |
838 | 761 | ||
839 | static void ops_complete_check(void *stripe_head_ref) | 762 | static void ops_complete_check(void *stripe_head_ref) |
840 | { | 763 | { |
841 | struct stripe_head *sh = stripe_head_ref; | 764 | struct stripe_head *sh = stripe_head_ref; |
842 | int pd_idx = sh->pd_idx; | ||
843 | 765 | ||
844 | pr_debug("%s: stripe %llu\n", __func__, | 766 | pr_debug("%s: stripe %llu\n", __func__, |
845 | (unsigned long long)sh->sector); | 767 | (unsigned long long)sh->sector); |
846 | 768 | ||
847 | if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) && | 769 | sh->check_state = check_state_check_result; |
848 | sh->ops.zero_sum_result == 0) | ||
849 | set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | ||
850 | |||
851 | set_bit(STRIPE_OP_CHECK, &sh->ops.complete); | ||
852 | set_bit(STRIPE_HANDLE, &sh->state); | 770 | set_bit(STRIPE_HANDLE, &sh->state); |
853 | release_stripe(sh); | 771 | release_stripe(sh); |
854 | } | 772 | } |
@@ -875,46 +793,42 @@ static void ops_run_check(struct stripe_head *sh) | |||
875 | tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, | 793 | tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, |
876 | &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); | 794 | &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); |
877 | 795 | ||
878 | if (tx) | ||
879 | set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); | ||
880 | else | ||
881 | clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); | ||
882 | |||
883 | atomic_inc(&sh->count); | 796 | atomic_inc(&sh->count); |
884 | tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, | 797 | tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, |
885 | ops_complete_check, sh); | 798 | ops_complete_check, sh); |
886 | } | 799 | } |
887 | 800 | ||
888 | static void raid5_run_ops(struct stripe_head *sh, unsigned long pending) | 801 | static void raid5_run_ops(struct stripe_head *sh, unsigned long ops_request) |
889 | { | 802 | { |
890 | int overlap_clear = 0, i, disks = sh->disks; | 803 | int overlap_clear = 0, i, disks = sh->disks; |
891 | struct dma_async_tx_descriptor *tx = NULL; | 804 | struct dma_async_tx_descriptor *tx = NULL; |
892 | 805 | ||
893 | if (test_bit(STRIPE_OP_BIOFILL, &pending)) { | 806 | if (test_bit(STRIPE_OP_BIOFILL, &ops_request)) { |
894 | ops_run_biofill(sh); | 807 | ops_run_biofill(sh); |
895 | overlap_clear++; | 808 | overlap_clear++; |
896 | } | 809 | } |
897 | 810 | ||
898 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending)) | 811 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &ops_request)) { |
899 | tx = ops_run_compute5(sh, pending); | 812 | tx = ops_run_compute5(sh); |
813 | /* terminate the chain if postxor is not set to be run */ | ||
814 | if (tx && !test_bit(STRIPE_OP_POSTXOR, &ops_request)) | ||
815 | async_tx_ack(tx); | ||
816 | } | ||
900 | 817 | ||
901 | if (test_bit(STRIPE_OP_PREXOR, &pending)) | 818 | if (test_bit(STRIPE_OP_PREXOR, &ops_request)) |
902 | tx = ops_run_prexor(sh, tx); | 819 | tx = ops_run_prexor(sh, tx); |
903 | 820 | ||
904 | if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { | 821 | if (test_bit(STRIPE_OP_BIODRAIN, &ops_request)) { |
905 | tx = ops_run_biodrain(sh, tx, pending); | 822 | tx = ops_run_biodrain(sh, tx); |
906 | overlap_clear++; | 823 | overlap_clear++; |
907 | } | 824 | } |
908 | 825 | ||
909 | if (test_bit(STRIPE_OP_POSTXOR, &pending)) | 826 | if (test_bit(STRIPE_OP_POSTXOR, &ops_request)) |
910 | ops_run_postxor(sh, tx, pending); | 827 | ops_run_postxor(sh, tx); |
911 | 828 | ||
912 | if (test_bit(STRIPE_OP_CHECK, &pending)) | 829 | if (test_bit(STRIPE_OP_CHECK, &ops_request)) |
913 | ops_run_check(sh); | 830 | ops_run_check(sh); |
914 | 831 | ||
915 | if (test_bit(STRIPE_OP_IO, &pending)) | ||
916 | ops_run_io(sh); | ||
917 | |||
918 | if (overlap_clear) | 832 | if (overlap_clear) |
919 | for (i = disks; i--; ) { | 833 | for (i = disks; i--; ) { |
920 | struct r5dev *dev = &sh->dev[i]; | 834 | struct r5dev *dev = &sh->dev[i]; |
@@ -997,14 +911,16 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) | |||
997 | struct stripe_head *osh, *nsh; | 911 | struct stripe_head *osh, *nsh; |
998 | LIST_HEAD(newstripes); | 912 | LIST_HEAD(newstripes); |
999 | struct disk_info *ndisks; | 913 | struct disk_info *ndisks; |
1000 | int err = 0; | 914 | int err; |
1001 | struct kmem_cache *sc; | 915 | struct kmem_cache *sc; |
1002 | int i; | 916 | int i; |
1003 | 917 | ||
1004 | if (newsize <= conf->pool_size) | 918 | if (newsize <= conf->pool_size) |
1005 | return 0; /* never bother to shrink */ | 919 | return 0; /* never bother to shrink */ |
1006 | 920 | ||
1007 | md_allow_write(conf->mddev); | 921 | err = md_allow_write(conf->mddev); |
922 | if (err) | ||
923 | return err; | ||
1008 | 924 | ||
1009 | /* Step 1 */ | 925 | /* Step 1 */ |
1010 | sc = kmem_cache_create(conf->cache_name[1-conf->active_name], | 926 | sc = kmem_cache_create(conf->cache_name[1-conf->active_name], |
@@ -1703,11 +1619,11 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2) | |||
1703 | } | 1619 | } |
1704 | } | 1620 | } |
1705 | 1621 | ||
1706 | static int | 1622 | static void |
1707 | handle_write_operations5(struct stripe_head *sh, int rcw, int expand) | 1623 | schedule_reconstruction5(struct stripe_head *sh, struct stripe_head_state *s, |
1624 | int rcw, int expand) | ||
1708 | { | 1625 | { |
1709 | int i, pd_idx = sh->pd_idx, disks = sh->disks; | 1626 | int i, pd_idx = sh->pd_idx, disks = sh->disks; |
1710 | int locked = 0; | ||
1711 | 1627 | ||
1712 | if (rcw) { | 1628 | if (rcw) { |
1713 | /* if we are not expanding this is a proper write request, and | 1629 | /* if we are not expanding this is a proper write request, and |
@@ -1715,53 +1631,48 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand) | |||
1715 | * stripe cache | 1631 | * stripe cache |
1716 | */ | 1632 | */ |
1717 | if (!expand) { | 1633 | if (!expand) { |
1718 | set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); | 1634 | sh->reconstruct_state = reconstruct_state_drain_run; |
1719 | sh->ops.count++; | 1635 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); |
1720 | } | 1636 | } else |
1637 | sh->reconstruct_state = reconstruct_state_run; | ||
1721 | 1638 | ||
1722 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); | 1639 | set_bit(STRIPE_OP_POSTXOR, &s->ops_request); |
1723 | sh->ops.count++; | ||
1724 | 1640 | ||
1725 | for (i = disks; i--; ) { | 1641 | for (i = disks; i--; ) { |
1726 | struct r5dev *dev = &sh->dev[i]; | 1642 | struct r5dev *dev = &sh->dev[i]; |
1727 | 1643 | ||
1728 | if (dev->towrite) { | 1644 | if (dev->towrite) { |
1729 | set_bit(R5_LOCKED, &dev->flags); | 1645 | set_bit(R5_LOCKED, &dev->flags); |
1646 | set_bit(R5_Wantdrain, &dev->flags); | ||
1730 | if (!expand) | 1647 | if (!expand) |
1731 | clear_bit(R5_UPTODATE, &dev->flags); | 1648 | clear_bit(R5_UPTODATE, &dev->flags); |
1732 | locked++; | 1649 | s->locked++; |
1733 | } | 1650 | } |
1734 | } | 1651 | } |
1735 | if (locked + 1 == disks) | 1652 | if (s->locked + 1 == disks) |
1736 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) | 1653 | if (!test_and_set_bit(STRIPE_FULL_WRITE, &sh->state)) |
1737 | atomic_inc(&sh->raid_conf->pending_full_writes); | 1654 | atomic_inc(&sh->raid_conf->pending_full_writes); |
1738 | } else { | 1655 | } else { |
1739 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || | 1656 | BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || |
1740 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); | 1657 | test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); |
1741 | 1658 | ||
1742 | set_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | 1659 | sh->reconstruct_state = reconstruct_state_prexor_drain_run; |
1743 | set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); | 1660 | set_bit(STRIPE_OP_PREXOR, &s->ops_request); |
1744 | set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); | 1661 | set_bit(STRIPE_OP_BIODRAIN, &s->ops_request); |
1745 | 1662 | set_bit(STRIPE_OP_POSTXOR, &s->ops_request); | |
1746 | sh->ops.count += 3; | ||
1747 | 1663 | ||
1748 | for (i = disks; i--; ) { | 1664 | for (i = disks; i--; ) { |
1749 | struct r5dev *dev = &sh->dev[i]; | 1665 | struct r5dev *dev = &sh->dev[i]; |
1750 | if (i == pd_idx) | 1666 | if (i == pd_idx) |
1751 | continue; | 1667 | continue; |
1752 | 1668 | ||
1753 | /* For a read-modify write there may be blocks that are | ||
1754 | * locked for reading while others are ready to be | ||
1755 | * written so we distinguish these blocks by the | ||
1756 | * R5_Wantprexor bit | ||
1757 | */ | ||
1758 | if (dev->towrite && | 1669 | if (dev->towrite && |
1759 | (test_bit(R5_UPTODATE, &dev->flags) || | 1670 | (test_bit(R5_UPTODATE, &dev->flags) || |
1760 | test_bit(R5_Wantcompute, &dev->flags))) { | 1671 | test_bit(R5_Wantcompute, &dev->flags))) { |
1761 | set_bit(R5_Wantprexor, &dev->flags); | 1672 | set_bit(R5_Wantdrain, &dev->flags); |
1762 | set_bit(R5_LOCKED, &dev->flags); | 1673 | set_bit(R5_LOCKED, &dev->flags); |
1763 | clear_bit(R5_UPTODATE, &dev->flags); | 1674 | clear_bit(R5_UPTODATE, &dev->flags); |
1764 | locked++; | 1675 | s->locked++; |
1765 | } | 1676 | } |
1766 | } | 1677 | } |
1767 | } | 1678 | } |
@@ -1771,13 +1682,11 @@ handle_write_operations5(struct stripe_head *sh, int rcw, int expand) | |||
1771 | */ | 1682 | */ |
1772 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); | 1683 | set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); |
1773 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); | 1684 | clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); |
1774 | locked++; | 1685 | s->locked++; |
1775 | 1686 | ||
1776 | pr_debug("%s: stripe %llu locked: %d pending: %lx\n", | 1687 | pr_debug("%s: stripe %llu locked: %d ops_request: %lx\n", |
1777 | __func__, (unsigned long long)sh->sector, | 1688 | __func__, (unsigned long long)sh->sector, |
1778 | locked, sh->ops.pending); | 1689 | s->locked, s->ops_request); |
1779 | |||
1780 | return locked; | ||
1781 | } | 1690 | } |
1782 | 1691 | ||
1783 | /* | 1692 | /* |
@@ -1876,7 +1785,7 @@ static int stripe_to_pdidx(sector_t stripe, raid5_conf_t *conf, int disks) | |||
1876 | } | 1785 | } |
1877 | 1786 | ||
1878 | static void | 1787 | static void |
1879 | handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh, | 1788 | handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, |
1880 | struct stripe_head_state *s, int disks, | 1789 | struct stripe_head_state *s, int disks, |
1881 | struct bio **return_bi) | 1790 | struct bio **return_bi) |
1882 | { | 1791 | { |
@@ -1967,48 +1876,38 @@ handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh, | |||
1967 | md_wakeup_thread(conf->mddev->thread); | 1876 | md_wakeup_thread(conf->mddev->thread); |
1968 | } | 1877 | } |
1969 | 1878 | ||
1970 | /* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks | 1879 | /* fetch_block5 - checks the given member device to see if its data needs |
1971 | * to process | 1880 | * to be read or computed to satisfy a request. |
1881 | * | ||
1882 | * Returns 1 when no more member devices need to be checked, otherwise returns | ||
1883 | * 0 to tell the loop in handle_stripe_fill5 to continue | ||
1972 | */ | 1884 | */ |
1973 | static int __handle_issuing_new_read_requests5(struct stripe_head *sh, | 1885 | static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s, |
1974 | struct stripe_head_state *s, int disk_idx, int disks) | 1886 | int disk_idx, int disks) |
1975 | { | 1887 | { |
1976 | struct r5dev *dev = &sh->dev[disk_idx]; | 1888 | struct r5dev *dev = &sh->dev[disk_idx]; |
1977 | struct r5dev *failed_dev = &sh->dev[s->failed_num]; | 1889 | struct r5dev *failed_dev = &sh->dev[s->failed_num]; |
1978 | 1890 | ||
1979 | /* don't schedule compute operations or reads on the parity block while | ||
1980 | * a check is in flight | ||
1981 | */ | ||
1982 | if ((disk_idx == sh->pd_idx) && | ||
1983 | test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) | ||
1984 | return ~0; | ||
1985 | |||
1986 | /* is the data in this block needed, and can we get it? */ | 1891 | /* is the data in this block needed, and can we get it? */ |
1987 | if (!test_bit(R5_LOCKED, &dev->flags) && | 1892 | if (!test_bit(R5_LOCKED, &dev->flags) && |
1988 | !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread || | 1893 | !test_bit(R5_UPTODATE, &dev->flags) && |
1989 | (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || | 1894 | (dev->toread || |
1990 | s->syncing || s->expanding || (s->failed && | 1895 | (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || |
1991 | (failed_dev->toread || (failed_dev->towrite && | 1896 | s->syncing || s->expanding || |
1992 | !test_bit(R5_OVERWRITE, &failed_dev->flags) | 1897 | (s->failed && |
1993 | ))))) { | 1898 | (failed_dev->toread || |
1994 | /* 1/ We would like to get this block, possibly by computing it, | 1899 | (failed_dev->towrite && |
1995 | * but we might not be able to. | 1900 | !test_bit(R5_OVERWRITE, &failed_dev->flags)))))) { |
1996 | * | 1901 | /* We would like to get this block, possibly by computing it, |
1997 | * 2/ Since parity check operations potentially make the parity | 1902 | * otherwise read it if the backing disk is insync |
1998 | * block !uptodate it will need to be refreshed before any | ||
1999 | * compute operations on data disks are scheduled. | ||
2000 | * | ||
2001 | * 3/ We hold off parity block re-reads until check operations | ||
2002 | * have quiesced. | ||
2003 | */ | 1903 | */ |
2004 | if ((s->uptodate == disks - 1) && | 1904 | if ((s->uptodate == disks - 1) && |
2005 | (s->failed && disk_idx == s->failed_num) && | 1905 | (s->failed && disk_idx == s->failed_num)) { |
2006 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { | 1906 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); |
2007 | set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | 1907 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); |
2008 | set_bit(R5_Wantcompute, &dev->flags); | 1908 | set_bit(R5_Wantcompute, &dev->flags); |
2009 | sh->ops.target = disk_idx; | 1909 | sh->ops.target = disk_idx; |
2010 | s->req_compute = 1; | 1910 | s->req_compute = 1; |
2011 | sh->ops.count++; | ||
2012 | /* Careful: from this point on 'uptodate' is in the eye | 1911 | /* Careful: from this point on 'uptodate' is in the eye |
2013 | * of raid5_run_ops which services 'compute' operations | 1912 | * of raid5_run_ops which services 'compute' operations |
2014 | * before writes. R5_Wantcompute flags a block that will | 1913 | * before writes. R5_Wantcompute flags a block that will |
@@ -2016,53 +1915,40 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, | |||
2016 | * subsequent operation. | 1915 | * subsequent operation. |
2017 | */ | 1916 | */ |
2018 | s->uptodate++; | 1917 | s->uptodate++; |
2019 | return 0; /* uptodate + compute == disks */ | 1918 | return 1; /* uptodate + compute == disks */ |
2020 | } else if (test_bit(R5_Insync, &dev->flags)) { | 1919 | } else if (test_bit(R5_Insync, &dev->flags)) { |
2021 | set_bit(R5_LOCKED, &dev->flags); | 1920 | set_bit(R5_LOCKED, &dev->flags); |
2022 | set_bit(R5_Wantread, &dev->flags); | 1921 | set_bit(R5_Wantread, &dev->flags); |
2023 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | ||
2024 | sh->ops.count++; | ||
2025 | s->locked++; | 1922 | s->locked++; |
2026 | pr_debug("Reading block %d (sync=%d)\n", disk_idx, | 1923 | pr_debug("Reading block %d (sync=%d)\n", disk_idx, |
2027 | s->syncing); | 1924 | s->syncing); |
2028 | } | 1925 | } |
2029 | } | 1926 | } |
2030 | 1927 | ||
2031 | return ~0; | 1928 | return 0; |
2032 | } | 1929 | } |
2033 | 1930 | ||
2034 | static void handle_issuing_new_read_requests5(struct stripe_head *sh, | 1931 | /** |
1932 | * handle_stripe_fill5 - read or compute data to satisfy pending requests. | ||
1933 | */ | ||
1934 | static void handle_stripe_fill5(struct stripe_head *sh, | ||
2035 | struct stripe_head_state *s, int disks) | 1935 | struct stripe_head_state *s, int disks) |
2036 | { | 1936 | { |
2037 | int i; | 1937 | int i; |
2038 | 1938 | ||
2039 | /* Clear completed compute operations. Parity recovery | ||
2040 | * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled | ||
2041 | * later on in this routine | ||
2042 | */ | ||
2043 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && | ||
2044 | !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | ||
2045 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | ||
2046 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); | ||
2047 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | ||
2048 | } | ||
2049 | |||
2050 | /* look for blocks to read/compute, skip this if a compute | 1939 | /* look for blocks to read/compute, skip this if a compute |
2051 | * is already in flight, or if the stripe contents are in the | 1940 | * is already in flight, or if the stripe contents are in the |
2052 | * midst of changing due to a write | 1941 | * midst of changing due to a write |
2053 | */ | 1942 | */ |
2054 | if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && | 1943 | if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && |
2055 | !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) && | 1944 | !sh->reconstruct_state) |
2056 | !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { | ||
2057 | for (i = disks; i--; ) | 1945 | for (i = disks; i--; ) |
2058 | if (__handle_issuing_new_read_requests5( | 1946 | if (fetch_block5(sh, s, i, disks)) |
2059 | sh, s, i, disks) == 0) | ||
2060 | break; | 1947 | break; |
2061 | } | ||
2062 | set_bit(STRIPE_HANDLE, &sh->state); | 1948 | set_bit(STRIPE_HANDLE, &sh->state); |
2063 | } | 1949 | } |
2064 | 1950 | ||
2065 | static void handle_issuing_new_read_requests6(struct stripe_head *sh, | 1951 | static void handle_stripe_fill6(struct stripe_head *sh, |
2066 | struct stripe_head_state *s, struct r6_state *r6s, | 1952 | struct stripe_head_state *s, struct r6_state *r6s, |
2067 | int disks) | 1953 | int disks) |
2068 | { | 1954 | { |
@@ -2121,12 +2007,12 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh, | |||
2121 | } | 2007 | } |
2122 | 2008 | ||
2123 | 2009 | ||
2124 | /* handle_completed_write_requests | 2010 | /* handle_stripe_clean_event |
2125 | * any written block on an uptodate or failed drive can be returned. | 2011 | * any written block on an uptodate or failed drive can be returned. |
2126 | * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but | 2012 | * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but |
2127 | * never LOCKED, so we don't need to test 'failed' directly. | 2013 | * never LOCKED, so we don't need to test 'failed' directly. |
2128 | */ | 2014 | */ |
2129 | static void handle_completed_write_requests(raid5_conf_t *conf, | 2015 | static void handle_stripe_clean_event(raid5_conf_t *conf, |
2130 | struct stripe_head *sh, int disks, struct bio **return_bi) | 2016 | struct stripe_head *sh, int disks, struct bio **return_bi) |
2131 | { | 2017 | { |
2132 | int i; | 2018 | int i; |
@@ -2171,7 +2057,7 @@ static void handle_completed_write_requests(raid5_conf_t *conf, | |||
2171 | md_wakeup_thread(conf->mddev->thread); | 2057 | md_wakeup_thread(conf->mddev->thread); |
2172 | } | 2058 | } |
2173 | 2059 | ||
2174 | static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | 2060 | static void handle_stripe_dirtying5(raid5_conf_t *conf, |
2175 | struct stripe_head *sh, struct stripe_head_state *s, int disks) | 2061 | struct stripe_head *sh, struct stripe_head_state *s, int disks) |
2176 | { | 2062 | { |
2177 | int rmw = 0, rcw = 0, i; | 2063 | int rmw = 0, rcw = 0, i; |
@@ -2215,9 +2101,6 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2215 | "%d for r-m-w\n", i); | 2101 | "%d for r-m-w\n", i); |
2216 | set_bit(R5_LOCKED, &dev->flags); | 2102 | set_bit(R5_LOCKED, &dev->flags); |
2217 | set_bit(R5_Wantread, &dev->flags); | 2103 | set_bit(R5_Wantread, &dev->flags); |
2218 | if (!test_and_set_bit( | ||
2219 | STRIPE_OP_IO, &sh->ops.pending)) | ||
2220 | sh->ops.count++; | ||
2221 | s->locked++; | 2104 | s->locked++; |
2222 | } else { | 2105 | } else { |
2223 | set_bit(STRIPE_DELAYED, &sh->state); | 2106 | set_bit(STRIPE_DELAYED, &sh->state); |
@@ -2241,9 +2124,6 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2241 | "%d for Reconstruct\n", i); | 2124 | "%d for Reconstruct\n", i); |
2242 | set_bit(R5_LOCKED, &dev->flags); | 2125 | set_bit(R5_LOCKED, &dev->flags); |
2243 | set_bit(R5_Wantread, &dev->flags); | 2126 | set_bit(R5_Wantread, &dev->flags); |
2244 | if (!test_and_set_bit( | ||
2245 | STRIPE_OP_IO, &sh->ops.pending)) | ||
2246 | sh->ops.count++; | ||
2247 | s->locked++; | 2127 | s->locked++; |
2248 | } else { | 2128 | } else { |
2249 | set_bit(STRIPE_DELAYED, &sh->state); | 2129 | set_bit(STRIPE_DELAYED, &sh->state); |
@@ -2261,14 +2141,13 @@ static void handle_issuing_new_write_requests5(raid5_conf_t *conf, | |||
2261 | * simultaneously. If this is not the case then new writes need to be | 2141 | * simultaneously. If this is not the case then new writes need to be |
2262 | * held off until the compute completes. | 2142 | * held off until the compute completes. |
2263 | */ | 2143 | */ |
2264 | if ((s->req_compute || | 2144 | if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && |
2265 | !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) && | 2145 | (s->locked == 0 && (rcw == 0 || rmw == 0) && |
2266 | (s->locked == 0 && (rcw == 0 || rmw == 0) && | 2146 | !test_bit(STRIPE_BIT_DELAY, &sh->state))) |
2267 | !test_bit(STRIPE_BIT_DELAY, &sh->state))) | 2147 | schedule_reconstruction5(sh, s, rcw == 0, 0); |
2268 | s->locked += handle_write_operations5(sh, rcw == 0, 0); | ||
2269 | } | 2148 | } |
2270 | 2149 | ||
2271 | static void handle_issuing_new_write_requests6(raid5_conf_t *conf, | 2150 | static void handle_stripe_dirtying6(raid5_conf_t *conf, |
2272 | struct stripe_head *sh, struct stripe_head_state *s, | 2151 | struct stripe_head *sh, struct stripe_head_state *s, |
2273 | struct r6_state *r6s, int disks) | 2152 | struct r6_state *r6s, int disks) |
2274 | { | 2153 | { |
@@ -2371,92 +2250,86 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf, | |||
2371 | static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, | 2250 | static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, |
2372 | struct stripe_head_state *s, int disks) | 2251 | struct stripe_head_state *s, int disks) |
2373 | { | 2252 | { |
2374 | int canceled_check = 0; | 2253 | struct r5dev *dev = NULL; |
2375 | 2254 | ||
2376 | set_bit(STRIPE_HANDLE, &sh->state); | 2255 | set_bit(STRIPE_HANDLE, &sh->state); |
2377 | 2256 | ||
2378 | /* complete a check operation */ | 2257 | switch (sh->check_state) { |
2379 | if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { | 2258 | case check_state_idle: |
2380 | clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); | 2259 | /* start a new check operation if there are no failures */ |
2381 | clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); | ||
2382 | if (s->failed == 0) { | 2260 | if (s->failed == 0) { |
2383 | if (sh->ops.zero_sum_result == 0) | ||
2384 | /* parity is correct (on disc, | ||
2385 | * not in buffer any more) | ||
2386 | */ | ||
2387 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2388 | else { | ||
2389 | conf->mddev->resync_mismatches += | ||
2390 | STRIPE_SECTORS; | ||
2391 | if (test_bit( | ||
2392 | MD_RECOVERY_CHECK, &conf->mddev->recovery)) | ||
2393 | /* don't try to repair!! */ | ||
2394 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2395 | else { | ||
2396 | set_bit(STRIPE_OP_COMPUTE_BLK, | ||
2397 | &sh->ops.pending); | ||
2398 | set_bit(STRIPE_OP_MOD_REPAIR_PD, | ||
2399 | &sh->ops.pending); | ||
2400 | set_bit(R5_Wantcompute, | ||
2401 | &sh->dev[sh->pd_idx].flags); | ||
2402 | sh->ops.target = sh->pd_idx; | ||
2403 | sh->ops.count++; | ||
2404 | s->uptodate++; | ||
2405 | } | ||
2406 | } | ||
2407 | } else | ||
2408 | canceled_check = 1; /* STRIPE_INSYNC is not set */ | ||
2409 | } | ||
2410 | |||
2411 | /* start a new check operation if there are no failures, the stripe is | ||
2412 | * not insync, and a repair is not in flight | ||
2413 | */ | ||
2414 | if (s->failed == 0 && | ||
2415 | !test_bit(STRIPE_INSYNC, &sh->state) && | ||
2416 | !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | ||
2417 | if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { | ||
2418 | BUG_ON(s->uptodate != disks); | 2261 | BUG_ON(s->uptodate != disks); |
2262 | sh->check_state = check_state_run; | ||
2263 | set_bit(STRIPE_OP_CHECK, &s->ops_request); | ||
2419 | clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); | 2264 | clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); |
2420 | sh->ops.count++; | ||
2421 | s->uptodate--; | 2265 | s->uptodate--; |
2266 | break; | ||
2422 | } | 2267 | } |
2423 | } | 2268 | dev = &sh->dev[s->failed_num]; |
2424 | 2269 | /* fall through */ | |
2425 | /* check if we can clear a parity disk reconstruct */ | 2270 | case check_state_compute_result: |
2426 | if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && | 2271 | sh->check_state = check_state_idle; |
2427 | test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { | 2272 | if (!dev) |
2428 | 2273 | dev = &sh->dev[sh->pd_idx]; | |
2429 | clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); | 2274 | |
2430 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); | 2275 | /* check that a write has not made the stripe insync */ |
2431 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); | 2276 | if (test_bit(STRIPE_INSYNC, &sh->state)) |
2432 | clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); | 2277 | break; |
2433 | } | ||
2434 | |||
2435 | 2278 | ||
2436 | /* Wait for check parity and compute block operations to complete | ||
2437 | * before write-back. If a failure occurred while the check operation | ||
2438 | * was in flight we need to cycle this stripe through handle_stripe | ||
2439 | * since the parity block may not be uptodate | ||
2440 | */ | ||
2441 | if (!canceled_check && !test_bit(STRIPE_INSYNC, &sh->state) && | ||
2442 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) && | ||
2443 | !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) { | ||
2444 | struct r5dev *dev; | ||
2445 | /* either failed parity check, or recovery is happening */ | 2279 | /* either failed parity check, or recovery is happening */ |
2446 | if (s->failed == 0) | ||
2447 | s->failed_num = sh->pd_idx; | ||
2448 | dev = &sh->dev[s->failed_num]; | ||
2449 | BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); | 2280 | BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); |
2450 | BUG_ON(s->uptodate != disks); | 2281 | BUG_ON(s->uptodate != disks); |
2451 | 2282 | ||
2452 | set_bit(R5_LOCKED, &dev->flags); | 2283 | set_bit(R5_LOCKED, &dev->flags); |
2284 | s->locked++; | ||
2453 | set_bit(R5_Wantwrite, &dev->flags); | 2285 | set_bit(R5_Wantwrite, &dev->flags); |
2454 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | ||
2455 | sh->ops.count++; | ||
2456 | 2286 | ||
2457 | clear_bit(STRIPE_DEGRADED, &sh->state); | 2287 | clear_bit(STRIPE_DEGRADED, &sh->state); |
2458 | s->locked++; | ||
2459 | set_bit(STRIPE_INSYNC, &sh->state); | 2288 | set_bit(STRIPE_INSYNC, &sh->state); |
2289 | break; | ||
2290 | case check_state_run: | ||
2291 | break; /* we will be called again upon completion */ | ||
2292 | case check_state_check_result: | ||
2293 | sh->check_state = check_state_idle; | ||
2294 | |||
2295 | /* if a failure occurred during the check operation, leave | ||
2296 | * STRIPE_INSYNC not set and let the stripe be handled again | ||
2297 | */ | ||
2298 | if (s->failed) | ||
2299 | break; | ||
2300 | |||
2301 | /* handle a successful check operation, if parity is correct | ||
2302 | * we are done. Otherwise update the mismatch count and repair | ||
2303 | * parity if !MD_RECOVERY_CHECK | ||
2304 | */ | ||
2305 | if (sh->ops.zero_sum_result == 0) | ||
2306 | /* parity is correct (on disc, | ||
2307 | * not in buffer any more) | ||
2308 | */ | ||
2309 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2310 | else { | ||
2311 | conf->mddev->resync_mismatches += STRIPE_SECTORS; | ||
2312 | if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) | ||
2313 | /* don't try to repair!! */ | ||
2314 | set_bit(STRIPE_INSYNC, &sh->state); | ||
2315 | else { | ||
2316 | sh->check_state = check_state_compute_run; | ||
2317 | set_bit(STRIPE_COMPUTE_RUN, &sh->state); | ||
2318 | set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); | ||
2319 | set_bit(R5_Wantcompute, | ||
2320 | &sh->dev[sh->pd_idx].flags); | ||
2321 | sh->ops.target = sh->pd_idx; | ||
2322 | s->uptodate++; | ||
2323 | } | ||
2324 | } | ||
2325 | break; | ||
2326 | case check_state_compute_run: | ||
2327 | break; | ||
2328 | default: | ||
2329 | printk(KERN_ERR "%s: unknown check_state: %d sector: %llu\n", | ||
2330 | __func__, sh->check_state, | ||
2331 | (unsigned long long) sh->sector); | ||
2332 | BUG(); | ||
2460 | } | 2333 | } |
2461 | } | 2334 | } |
2462 | 2335 | ||
@@ -2641,15 +2514,14 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2641 | struct bio *return_bi = NULL; | 2514 | struct bio *return_bi = NULL; |
2642 | struct stripe_head_state s; | 2515 | struct stripe_head_state s; |
2643 | struct r5dev *dev; | 2516 | struct r5dev *dev; |
2644 | unsigned long pending = 0; | ||
2645 | mdk_rdev_t *blocked_rdev = NULL; | 2517 | mdk_rdev_t *blocked_rdev = NULL; |
2646 | int prexor; | 2518 | int prexor; |
2647 | 2519 | ||
2648 | memset(&s, 0, sizeof(s)); | 2520 | memset(&s, 0, sizeof(s)); |
2649 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " | 2521 | pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d " |
2650 | "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state, | 2522 | "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state, |
2651 | atomic_read(&sh->count), sh->pd_idx, | 2523 | atomic_read(&sh->count), sh->pd_idx, sh->check_state, |
2652 | sh->ops.pending, sh->ops.ack, sh->ops.complete); | 2524 | sh->reconstruct_state); |
2653 | 2525 | ||
2654 | spin_lock(&sh->lock); | 2526 | spin_lock(&sh->lock); |
2655 | clear_bit(STRIPE_HANDLE, &sh->state); | 2527 | clear_bit(STRIPE_HANDLE, &sh->state); |
@@ -2658,15 +2530,8 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2658 | s.syncing = test_bit(STRIPE_SYNCING, &sh->state); | 2530 | s.syncing = test_bit(STRIPE_SYNCING, &sh->state); |
2659 | s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); | 2531 | s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); |
2660 | s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); | 2532 | s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); |
2661 | /* Now to look around and see what can be done */ | ||
2662 | |||
2663 | /* clean-up completed biofill operations */ | ||
2664 | if (test_bit(STRIPE_OP_BIOFILL, &sh->ops.complete)) { | ||
2665 | clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending); | ||
2666 | clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack); | ||
2667 | clear_bit(STRIPE_OP_BIOFILL, &sh->ops.complete); | ||
2668 | } | ||
2669 | 2533 | ||
2534 | /* Now to look around and see what can be done */ | ||
2670 | rcu_read_lock(); | 2535 | rcu_read_lock(); |
2671 | for (i=disks; i--; ) { | 2536 | for (i=disks; i--; ) { |
2672 | mdk_rdev_t *rdev; | 2537 | mdk_rdev_t *rdev; |
@@ -2680,10 +2545,10 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2680 | /* maybe we can request a biofill operation | 2545 | /* maybe we can request a biofill operation |
2681 | * | 2546 | * |
2682 | * new wantfill requests are only permitted while | 2547 | * new wantfill requests are only permitted while |
2683 | * STRIPE_OP_BIOFILL is clear | 2548 | * ops_complete_biofill is guaranteed to be inactive |
2684 | */ | 2549 | */ |
2685 | if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && | 2550 | if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && |
2686 | !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) | 2551 | !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) |
2687 | set_bit(R5_Wantfill, &dev->flags); | 2552 | set_bit(R5_Wantfill, &dev->flags); |
2688 | 2553 | ||
2689 | /* now count some things */ | 2554 | /* now count some things */ |
@@ -2727,8 +2592,10 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2727 | goto unlock; | 2592 | goto unlock; |
2728 | } | 2593 | } |
2729 | 2594 | ||
2730 | if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) | 2595 | if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { |
2731 | sh->ops.count++; | 2596 | set_bit(STRIPE_OP_BIOFILL, &s.ops_request); |
2597 | set_bit(STRIPE_BIOFILL_RUN, &sh->state); | ||
2598 | } | ||
2732 | 2599 | ||
2733 | pr_debug("locked=%d uptodate=%d to_read=%d" | 2600 | pr_debug("locked=%d uptodate=%d to_read=%d" |
2734 | " to_write=%d failed=%d failed_num=%d\n", | 2601 | " to_write=%d failed=%d failed_num=%d\n", |
@@ -2738,8 +2605,7 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2738 | * need to be failed | 2605 | * need to be failed |
2739 | */ | 2606 | */ |
2740 | if (s.failed > 1 && s.to_read+s.to_write+s.written) | 2607 | if (s.failed > 1 && s.to_read+s.to_write+s.written) |
2741 | handle_requests_to_failed_array(conf, sh, &s, disks, | 2608 | handle_failed_stripe(conf, sh, &s, disks, &return_bi); |
2742 | &return_bi); | ||
2743 | if (s.failed > 1 && s.syncing) { | 2609 | if (s.failed > 1 && s.syncing) { |
2744 | md_done_sync(conf->mddev, STRIPE_SECTORS,0); | 2610 | md_done_sync(conf->mddev, STRIPE_SECTORS,0); |
2745 | clear_bit(STRIPE_SYNCING, &sh->state); | 2611 | clear_bit(STRIPE_SYNCING, &sh->state); |
@@ -2755,48 +2621,25 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2755 | !test_bit(R5_LOCKED, &dev->flags) && | 2621 | !test_bit(R5_LOCKED, &dev->flags) && |
2756 | test_bit(R5_UPTODATE, &dev->flags)) || | 2622 | test_bit(R5_UPTODATE, &dev->flags)) || |
2757 | (s.failed == 1 && s.failed_num == sh->pd_idx))) | 2623 | (s.failed == 1 && s.failed_num == sh->pd_idx))) |
2758 | handle_completed_write_requests(conf, sh, disks, &return_bi); | 2624 | handle_stripe_clean_event(conf, sh, disks, &return_bi); |
2759 | 2625 | ||
2760 | /* Now we might consider reading some blocks, either to check/generate | 2626 | /* Now we might consider reading some blocks, either to check/generate |
2761 | * parity, or to satisfy requests | 2627 | * parity, or to satisfy requests |
2762 | * or to load a block that is being partially written. | 2628 | * or to load a block that is being partially written. |
2763 | */ | 2629 | */ |
2764 | if (s.to_read || s.non_overwrite || | 2630 | if (s.to_read || s.non_overwrite || |
2765 | (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding || | 2631 | (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) |
2766 | test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) | 2632 | handle_stripe_fill5(sh, &s, disks); |
2767 | handle_issuing_new_read_requests5(sh, &s, disks); | ||
2768 | 2633 | ||
2769 | /* Now we check to see if any write operations have recently | 2634 | /* Now we check to see if any write operations have recently |
2770 | * completed | 2635 | * completed |
2771 | */ | 2636 | */ |
2772 | |||
2773 | /* leave prexor set until postxor is done, allows us to distinguish | ||
2774 | * a rmw from a rcw during biodrain | ||
2775 | */ | ||
2776 | prexor = 0; | 2637 | prexor = 0; |
2777 | if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && | 2638 | if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) |
2778 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { | ||
2779 | |||
2780 | prexor = 1; | 2639 | prexor = 1; |
2781 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); | 2640 | if (sh->reconstruct_state == reconstruct_state_drain_result || |
2782 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); | 2641 | sh->reconstruct_state == reconstruct_state_prexor_drain_result) { |
2783 | clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); | 2642 | sh->reconstruct_state = reconstruct_state_idle; |
2784 | |||
2785 | for (i = disks; i--; ) | ||
2786 | clear_bit(R5_Wantprexor, &sh->dev[i].flags); | ||
2787 | } | ||
2788 | |||
2789 | /* if only POSTXOR is set then this is an 'expand' postxor */ | ||
2790 | if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) && | ||
2791 | test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { | ||
2792 | |||
2793 | clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); | ||
2794 | clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack); | ||
2795 | clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); | ||
2796 | |||
2797 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); | ||
2798 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); | ||
2799 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); | ||
2800 | 2643 | ||
2801 | /* All the 'written' buffers and the parity block are ready to | 2644 | /* All the 'written' buffers and the parity block are ready to |
2802 | * be written back to disk | 2645 | * be written back to disk |
@@ -2808,9 +2651,6 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2808 | (i == sh->pd_idx || dev->written)) { | 2651 | (i == sh->pd_idx || dev->written)) { |
2809 | pr_debug("Writing block %d\n", i); | 2652 | pr_debug("Writing block %d\n", i); |
2810 | set_bit(R5_Wantwrite, &dev->flags); | 2653 | set_bit(R5_Wantwrite, &dev->flags); |
2811 | if (!test_and_set_bit( | ||
2812 | STRIPE_OP_IO, &sh->ops.pending)) | ||
2813 | sh->ops.count++; | ||
2814 | if (prexor) | 2654 | if (prexor) |
2815 | continue; | 2655 | continue; |
2816 | if (!test_bit(R5_Insync, &dev->flags) || | 2656 | if (!test_bit(R5_Insync, &dev->flags) || |
@@ -2832,20 +2672,18 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2832 | * 2/ A 'check' operation is in flight, as it may clobber the parity | 2672 | * 2/ A 'check' operation is in flight, as it may clobber the parity |
2833 | * block. | 2673 | * block. |
2834 | */ | 2674 | */ |
2835 | if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) && | 2675 | if (s.to_write && !sh->reconstruct_state && !sh->check_state) |
2836 | !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) | 2676 | handle_stripe_dirtying5(conf, sh, &s, disks); |
2837 | handle_issuing_new_write_requests5(conf, sh, &s, disks); | ||
2838 | 2677 | ||
2839 | /* maybe we need to check and possibly fix the parity for this stripe | 2678 | /* maybe we need to check and possibly fix the parity for this stripe |
2840 | * Any reads will already have been scheduled, so we just see if enough | 2679 | * Any reads will already have been scheduled, so we just see if enough |
2841 | * data is available. The parity check is held off while parity | 2680 | * data is available. The parity check is held off while parity |
2842 | * dependent operations are in flight. | 2681 | * dependent operations are in flight. |
2843 | */ | 2682 | */ |
2844 | if ((s.syncing && s.locked == 0 && | 2683 | if (sh->check_state || |
2845 | !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && | 2684 | (s.syncing && s.locked == 0 && |
2846 | !test_bit(STRIPE_INSYNC, &sh->state)) || | 2685 | !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && |
2847 | test_bit(STRIPE_OP_CHECK, &sh->ops.pending) || | 2686 | !test_bit(STRIPE_INSYNC, &sh->state))) |
2848 | test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) | ||
2849 | handle_parity_checks5(conf, sh, &s, disks); | 2687 | handle_parity_checks5(conf, sh, &s, disks); |
2850 | 2688 | ||
2851 | if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { | 2689 | if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { |
@@ -2864,52 +2702,35 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2864 | dev = &sh->dev[s.failed_num]; | 2702 | dev = &sh->dev[s.failed_num]; |
2865 | if (!test_bit(R5_ReWrite, &dev->flags)) { | 2703 | if (!test_bit(R5_ReWrite, &dev->flags)) { |
2866 | set_bit(R5_Wantwrite, &dev->flags); | 2704 | set_bit(R5_Wantwrite, &dev->flags); |
2867 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | ||
2868 | sh->ops.count++; | ||
2869 | set_bit(R5_ReWrite, &dev->flags); | 2705 | set_bit(R5_ReWrite, &dev->flags); |
2870 | set_bit(R5_LOCKED, &dev->flags); | 2706 | set_bit(R5_LOCKED, &dev->flags); |
2871 | s.locked++; | 2707 | s.locked++; |
2872 | } else { | 2708 | } else { |
2873 | /* let's read it back */ | 2709 | /* let's read it back */ |
2874 | set_bit(R5_Wantread, &dev->flags); | 2710 | set_bit(R5_Wantread, &dev->flags); |
2875 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | ||
2876 | sh->ops.count++; | ||
2877 | set_bit(R5_LOCKED, &dev->flags); | 2711 | set_bit(R5_LOCKED, &dev->flags); |
2878 | s.locked++; | 2712 | s.locked++; |
2879 | } | 2713 | } |
2880 | } | 2714 | } |
2881 | 2715 | ||
2882 | /* Finish postxor operations initiated by the expansion | 2716 | /* Finish reconstruct operations initiated by the expansion process */ |
2883 | * process | 2717 | if (sh->reconstruct_state == reconstruct_state_result) { |
2884 | */ | 2718 | sh->reconstruct_state = reconstruct_state_idle; |
2885 | if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) && | ||
2886 | !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) { | ||
2887 | |||
2888 | clear_bit(STRIPE_EXPANDING, &sh->state); | 2719 | clear_bit(STRIPE_EXPANDING, &sh->state); |
2889 | 2720 | for (i = conf->raid_disks; i--; ) | |
2890 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); | ||
2891 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); | ||
2892 | clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); | ||
2893 | |||
2894 | for (i = conf->raid_disks; i--; ) { | ||
2895 | set_bit(R5_Wantwrite, &sh->dev[i].flags); | 2721 | set_bit(R5_Wantwrite, &sh->dev[i].flags); |
2896 | set_bit(R5_LOCKED, &dev->flags); | 2722 | set_bit(R5_LOCKED, &dev->flags); |
2897 | s.locked++; | 2723 | s.locked++; |
2898 | if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) | ||
2899 | sh->ops.count++; | ||
2900 | } | ||
2901 | } | 2724 | } |
2902 | 2725 | ||
2903 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && | 2726 | if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && |
2904 | !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { | 2727 | !sh->reconstruct_state) { |
2905 | /* Need to write out all blocks after computing parity */ | 2728 | /* Need to write out all blocks after computing parity */ |
2906 | sh->disks = conf->raid_disks; | 2729 | sh->disks = conf->raid_disks; |
2907 | sh->pd_idx = stripe_to_pdidx(sh->sector, conf, | 2730 | sh->pd_idx = stripe_to_pdidx(sh->sector, conf, |
2908 | conf->raid_disks); | 2731 | conf->raid_disks); |
2909 | s.locked += handle_write_operations5(sh, 1, 1); | 2732 | schedule_reconstruction5(sh, &s, 1, 1); |
2910 | } else if (s.expanded && | 2733 | } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { |
2911 | s.locked == 0 && | ||
2912 | !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { | ||
2913 | clear_bit(STRIPE_EXPAND_READY, &sh->state); | 2734 | clear_bit(STRIPE_EXPAND_READY, &sh->state); |
2914 | atomic_dec(&conf->reshape_stripes); | 2735 | atomic_dec(&conf->reshape_stripes); |
2915 | wake_up(&conf->wait_for_overlap); | 2736 | wake_up(&conf->wait_for_overlap); |
@@ -2917,12 +2738,9 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2917 | } | 2738 | } |
2918 | 2739 | ||
2919 | if (s.expanding && s.locked == 0 && | 2740 | if (s.expanding && s.locked == 0 && |
2920 | !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) | 2741 | !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) |
2921 | handle_stripe_expansion(conf, sh, NULL); | 2742 | handle_stripe_expansion(conf, sh, NULL); |
2922 | 2743 | ||
2923 | if (sh->ops.count) | ||
2924 | pending = get_stripe_work(sh); | ||
2925 | |||
2926 | unlock: | 2744 | unlock: |
2927 | spin_unlock(&sh->lock); | 2745 | spin_unlock(&sh->lock); |
2928 | 2746 | ||
@@ -2930,11 +2748,12 @@ static void handle_stripe5(struct stripe_head *sh) | |||
2930 | if (unlikely(blocked_rdev)) | 2748 | if (unlikely(blocked_rdev)) |
2931 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | 2749 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); |
2932 | 2750 | ||
2933 | if (pending) | 2751 | if (s.ops_request) |
2934 | raid5_run_ops(sh, pending); | 2752 | raid5_run_ops(sh, s.ops_request); |
2935 | 2753 | ||
2936 | return_io(return_bi); | 2754 | ops_run_io(sh, &s); |
2937 | 2755 | ||
2756 | return_io(return_bi); | ||
2938 | } | 2757 | } |
2939 | 2758 | ||
2940 | static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | 2759 | static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) |
@@ -3042,8 +2861,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3042 | * might need to be failed | 2861 | * might need to be failed |
3043 | */ | 2862 | */ |
3044 | if (s.failed > 2 && s.to_read+s.to_write+s.written) | 2863 | if (s.failed > 2 && s.to_read+s.to_write+s.written) |
3045 | handle_requests_to_failed_array(conf, sh, &s, disks, | 2864 | handle_failed_stripe(conf, sh, &s, disks, &return_bi); |
3046 | &return_bi); | ||
3047 | if (s.failed > 2 && s.syncing) { | 2865 | if (s.failed > 2 && s.syncing) { |
3048 | md_done_sync(conf->mddev, STRIPE_SECTORS,0); | 2866 | md_done_sync(conf->mddev, STRIPE_SECTORS,0); |
3049 | clear_bit(STRIPE_SYNCING, &sh->state); | 2867 | clear_bit(STRIPE_SYNCING, &sh->state); |
@@ -3068,7 +2886,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3068 | ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags) | 2886 | ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags) |
3069 | && !test_bit(R5_LOCKED, &qdev->flags) | 2887 | && !test_bit(R5_LOCKED, &qdev->flags) |
3070 | && test_bit(R5_UPTODATE, &qdev->flags))))) | 2888 | && test_bit(R5_UPTODATE, &qdev->flags))))) |
3071 | handle_completed_write_requests(conf, sh, disks, &return_bi); | 2889 | handle_stripe_clean_event(conf, sh, disks, &return_bi); |
3072 | 2890 | ||
3073 | /* Now we might consider reading some blocks, either to check/generate | 2891 | /* Now we might consider reading some blocks, either to check/generate |
3074 | * parity, or to satisfy requests | 2892 | * parity, or to satisfy requests |
@@ -3076,11 +2894,11 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3076 | */ | 2894 | */ |
3077 | if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || | 2895 | if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || |
3078 | (s.syncing && (s.uptodate < disks)) || s.expanding) | 2896 | (s.syncing && (s.uptodate < disks)) || s.expanding) |
3079 | handle_issuing_new_read_requests6(sh, &s, &r6s, disks); | 2897 | handle_stripe_fill6(sh, &s, &r6s, disks); |
3080 | 2898 | ||
3081 | /* now to consider writing and what else, if anything should be read */ | 2899 | /* now to consider writing and what else, if anything should be read */ |
3082 | if (s.to_write) | 2900 | if (s.to_write) |
3083 | handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks); | 2901 | handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); |
3084 | 2902 | ||
3085 | /* maybe we need to check and possibly fix the parity for this stripe | 2903 | /* maybe we need to check and possibly fix the parity for this stripe |
3086 | * Any reads will already have been scheduled, so we just see if enough | 2904 | * Any reads will already have been scheduled, so we just see if enough |
@@ -3136,7 +2954,7 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3136 | } | 2954 | } |
3137 | 2955 | ||
3138 | if (s.expanding && s.locked == 0 && | 2956 | if (s.expanding && s.locked == 0 && |
3139 | !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) | 2957 | !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) |
3140 | handle_stripe_expansion(conf, sh, &r6s); | 2958 | handle_stripe_expansion(conf, sh, &r6s); |
3141 | 2959 | ||
3142 | unlock: | 2960 | unlock: |
@@ -3146,68 +2964,9 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) | |||
3146 | if (unlikely(blocked_rdev)) | 2964 | if (unlikely(blocked_rdev)) |
3147 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); | 2965 | md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); |
3148 | 2966 | ||
3149 | return_io(return_bi); | 2967 | ops_run_io(sh, &s); |
3150 | |||
3151 | for (i=disks; i-- ;) { | ||
3152 | int rw; | ||
3153 | struct bio *bi; | ||
3154 | mdk_rdev_t *rdev; | ||
3155 | if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) | ||
3156 | rw = WRITE; | ||
3157 | else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) | ||
3158 | rw = READ; | ||
3159 | else | ||
3160 | continue; | ||
3161 | |||
3162 | set_bit(STRIPE_IO_STARTED, &sh->state); | ||
3163 | |||
3164 | bi = &sh->dev[i].req; | ||
3165 | |||
3166 | bi->bi_rw = rw; | ||
3167 | if (rw == WRITE) | ||
3168 | bi->bi_end_io = raid5_end_write_request; | ||
3169 | else | ||
3170 | bi->bi_end_io = raid5_end_read_request; | ||
3171 | |||
3172 | rcu_read_lock(); | ||
3173 | rdev = rcu_dereference(conf->disks[i].rdev); | ||
3174 | if (rdev && test_bit(Faulty, &rdev->flags)) | ||
3175 | rdev = NULL; | ||
3176 | if (rdev) | ||
3177 | atomic_inc(&rdev->nr_pending); | ||
3178 | rcu_read_unlock(); | ||
3179 | 2968 | ||
3180 | if (rdev) { | 2969 | return_io(return_bi); |
3181 | if (s.syncing || s.expanding || s.expanded) | ||
3182 | md_sync_acct(rdev->bdev, STRIPE_SECTORS); | ||
3183 | |||
3184 | bi->bi_bdev = rdev->bdev; | ||
3185 | pr_debug("for %llu schedule op %ld on disc %d\n", | ||
3186 | (unsigned long long)sh->sector, bi->bi_rw, i); | ||
3187 | atomic_inc(&sh->count); | ||
3188 | bi->bi_sector = sh->sector + rdev->data_offset; | ||
3189 | bi->bi_flags = 1 << BIO_UPTODATE; | ||
3190 | bi->bi_vcnt = 1; | ||
3191 | bi->bi_max_vecs = 1; | ||
3192 | bi->bi_idx = 0; | ||
3193 | bi->bi_io_vec = &sh->dev[i].vec; | ||
3194 | bi->bi_io_vec[0].bv_len = STRIPE_SIZE; | ||
3195 | bi->bi_io_vec[0].bv_offset = 0; | ||
3196 | bi->bi_size = STRIPE_SIZE; | ||
3197 | bi->bi_next = NULL; | ||
3198 | if (rw == WRITE && | ||
3199 | test_bit(R5_ReWrite, &sh->dev[i].flags)) | ||
3200 | atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); | ||
3201 | generic_make_request(bi); | ||
3202 | } else { | ||
3203 | if (rw == WRITE) | ||
3204 | set_bit(STRIPE_DEGRADED, &sh->state); | ||
3205 | pr_debug("skip op %ld on disc %d for sector %llu\n", | ||
3206 | bi->bi_rw, i, (unsigned long long)sh->sector); | ||
3207 | clear_bit(R5_LOCKED, &sh->dev[i].flags); | ||
3208 | set_bit(STRIPE_HANDLE, &sh->state); | ||
3209 | } | ||
3210 | } | ||
3211 | } | 2970 | } |
3212 | 2971 | ||
3213 | static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) | 2972 | static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) |
@@ -3697,9 +3456,7 @@ static int make_request(struct request_queue *q, struct bio * bi) | |||
3697 | if ( rw == WRITE ) | 3456 | if ( rw == WRITE ) |
3698 | md_write_end(mddev); | 3457 | md_write_end(mddev); |
3699 | 3458 | ||
3700 | bi->bi_end_io(bi, | 3459 | bio_endio(bi, 0); |
3701 | test_bit(BIO_UPTODATE, &bi->bi_flags) | ||
3702 | ? 0 : -EIO); | ||
3703 | } | 3460 | } |
3704 | return 0; | 3461 | return 0; |
3705 | } | 3462 | } |
@@ -3785,7 +3542,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped | |||
3785 | j == raid6_next_disk(sh->pd_idx, sh->disks)) | 3542 | j == raid6_next_disk(sh->pd_idx, sh->disks)) |
3786 | continue; | 3543 | continue; |
3787 | s = compute_blocknr(sh, j); | 3544 | s = compute_blocknr(sh, j); |
3788 | if (s < (mddev->array_size<<1)) { | 3545 | if (s < mddev->array_sectors) { |
3789 | skipped = 1; | 3546 | skipped = 1; |
3790 | continue; | 3547 | continue; |
3791 | } | 3548 | } |
@@ -4002,12 +3759,8 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio) | |||
4002 | spin_lock_irq(&conf->device_lock); | 3759 | spin_lock_irq(&conf->device_lock); |
4003 | remaining = --raid_bio->bi_phys_segments; | 3760 | remaining = --raid_bio->bi_phys_segments; |
4004 | spin_unlock_irq(&conf->device_lock); | 3761 | spin_unlock_irq(&conf->device_lock); |
4005 | if (remaining == 0) { | 3762 | if (remaining == 0) |
4006 | 3763 | bio_endio(raid_bio, 0); | |
4007 | raid_bio->bi_end_io(raid_bio, | ||
4008 | test_bit(BIO_UPTODATE, &raid_bio->bi_flags) | ||
4009 | ? 0 : -EIO); | ||
4010 | } | ||
4011 | if (atomic_dec_and_test(&conf->active_aligned_reads)) | 3764 | if (atomic_dec_and_test(&conf->active_aligned_reads)) |
4012 | wake_up(&conf->wait_for_stripe); | 3765 | wake_up(&conf->wait_for_stripe); |
4013 | return handled; | 3766 | return handled; |
@@ -4094,6 +3847,8 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | |||
4094 | { | 3847 | { |
4095 | raid5_conf_t *conf = mddev_to_conf(mddev); | 3848 | raid5_conf_t *conf = mddev_to_conf(mddev); |
4096 | unsigned long new; | 3849 | unsigned long new; |
3850 | int err; | ||
3851 | |||
4097 | if (len >= PAGE_SIZE) | 3852 | if (len >= PAGE_SIZE) |
4098 | return -EINVAL; | 3853 | return -EINVAL; |
4099 | if (!conf) | 3854 | if (!conf) |
@@ -4109,7 +3864,9 @@ raid5_store_stripe_cache_size(mddev_t *mddev, const char *page, size_t len) | |||
4109 | else | 3864 | else |
4110 | break; | 3865 | break; |
4111 | } | 3866 | } |
4112 | md_allow_write(mddev); | 3867 | err = md_allow_write(mddev); |
3868 | if (err) | ||
3869 | return err; | ||
4113 | while (new > conf->max_nr_stripes) { | 3870 | while (new > conf->max_nr_stripes) { |
4114 | if (grow_one_stripe(conf)) | 3871 | if (grow_one_stripe(conf)) |
4115 | conf->max_nr_stripes++; | 3872 | conf->max_nr_stripes++; |
@@ -4434,7 +4191,7 @@ static int run(mddev_t *mddev) | |||
4434 | mddev->queue->backing_dev_info.congested_data = mddev; | 4191 | mddev->queue->backing_dev_info.congested_data = mddev; |
4435 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; | 4192 | mddev->queue->backing_dev_info.congested_fn = raid5_congested; |
4436 | 4193 | ||
4437 | mddev->array_size = mddev->size * (conf->previous_raid_disks - | 4194 | mddev->array_sectors = 2 * mddev->size * (conf->previous_raid_disks - |
4438 | conf->max_degraded); | 4195 | conf->max_degraded); |
4439 | 4196 | ||
4440 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); | 4197 | blk_queue_merge_bvec(mddev->queue, raid5_mergeable_bvec); |
@@ -4609,35 +4366,41 @@ abort: | |||
4609 | static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) | 4366 | static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) |
4610 | { | 4367 | { |
4611 | raid5_conf_t *conf = mddev->private; | 4368 | raid5_conf_t *conf = mddev->private; |
4612 | int found = 0; | 4369 | int err = -EEXIST; |
4613 | int disk; | 4370 | int disk; |
4614 | struct disk_info *p; | 4371 | struct disk_info *p; |
4372 | int first = 0; | ||
4373 | int last = conf->raid_disks - 1; | ||
4615 | 4374 | ||
4616 | if (mddev->degraded > conf->max_degraded) | 4375 | if (mddev->degraded > conf->max_degraded) |
4617 | /* no point adding a device */ | 4376 | /* no point adding a device */ |
4618 | return 0; | 4377 | return -EINVAL; |
4378 | |||
4379 | if (rdev->raid_disk >= 0) | ||
4380 | first = last = rdev->raid_disk; | ||
4619 | 4381 | ||
4620 | /* | 4382 | /* |
4621 | * find the disk ... but prefer rdev->saved_raid_disk | 4383 | * find the disk ... but prefer rdev->saved_raid_disk |
4622 | * if possible. | 4384 | * if possible. |
4623 | */ | 4385 | */ |
4624 | if (rdev->saved_raid_disk >= 0 && | 4386 | if (rdev->saved_raid_disk >= 0 && |
4387 | rdev->saved_raid_disk >= first && | ||
4625 | conf->disks[rdev->saved_raid_disk].rdev == NULL) | 4388 | conf->disks[rdev->saved_raid_disk].rdev == NULL) |
4626 | disk = rdev->saved_raid_disk; | 4389 | disk = rdev->saved_raid_disk; |
4627 | else | 4390 | else |
4628 | disk = 0; | 4391 | disk = first; |
4629 | for ( ; disk < conf->raid_disks; disk++) | 4392 | for ( ; disk <= last ; disk++) |
4630 | if ((p=conf->disks + disk)->rdev == NULL) { | 4393 | if ((p=conf->disks + disk)->rdev == NULL) { |
4631 | clear_bit(In_sync, &rdev->flags); | 4394 | clear_bit(In_sync, &rdev->flags); |
4632 | rdev->raid_disk = disk; | 4395 | rdev->raid_disk = disk; |
4633 | found = 1; | 4396 | err = 0; |
4634 | if (rdev->saved_raid_disk != disk) | 4397 | if (rdev->saved_raid_disk != disk) |
4635 | conf->fullsync = 1; | 4398 | conf->fullsync = 1; |
4636 | rcu_assign_pointer(p->rdev, rdev); | 4399 | rcu_assign_pointer(p->rdev, rdev); |
4637 | break; | 4400 | break; |
4638 | } | 4401 | } |
4639 | print_raid5_conf(conf); | 4402 | print_raid5_conf(conf); |
4640 | return found; | 4403 | return err; |
4641 | } | 4404 | } |
4642 | 4405 | ||
4643 | static int raid5_resize(mddev_t *mddev, sector_t sectors) | 4406 | static int raid5_resize(mddev_t *mddev, sector_t sectors) |
@@ -4652,8 +4415,9 @@ static int raid5_resize(mddev_t *mddev, sector_t sectors) | |||
4652 | raid5_conf_t *conf = mddev_to_conf(mddev); | 4415 | raid5_conf_t *conf = mddev_to_conf(mddev); |
4653 | 4416 | ||
4654 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); | 4417 | sectors &= ~((sector_t)mddev->chunk_size/512 - 1); |
4655 | mddev->array_size = (sectors * (mddev->raid_disks-conf->max_degraded))>>1; | 4418 | mddev->array_sectors = sectors * (mddev->raid_disks |
4656 | set_capacity(mddev->gendisk, mddev->array_size << 1); | 4419 | - conf->max_degraded); |
4420 | set_capacity(mddev->gendisk, mddev->array_sectors); | ||
4657 | mddev->changed = 1; | 4421 | mddev->changed = 1; |
4658 | if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) { | 4422 | if (sectors/2 > mddev->size && mddev->recovery_cp == MaxSector) { |
4659 | mddev->recovery_cp = mddev->size << 1; | 4423 | mddev->recovery_cp = mddev->size << 1; |
@@ -4738,7 +4502,7 @@ static int raid5_start_reshape(mddev_t *mddev) | |||
4738 | rdev_for_each(rdev, rtmp, mddev) | 4502 | rdev_for_each(rdev, rtmp, mddev) |
4739 | if (rdev->raid_disk < 0 && | 4503 | if (rdev->raid_disk < 0 && |
4740 | !test_bit(Faulty, &rdev->flags)) { | 4504 | !test_bit(Faulty, &rdev->flags)) { |
4741 | if (raid5_add_disk(mddev, rdev)) { | 4505 | if (raid5_add_disk(mddev, rdev) == 0) { |
4742 | char nm[20]; | 4506 | char nm[20]; |
4743 | set_bit(In_sync, &rdev->flags); | 4507 | set_bit(In_sync, &rdev->flags); |
4744 | added_devices++; | 4508 | added_devices++; |
@@ -4786,15 +4550,16 @@ static void end_reshape(raid5_conf_t *conf) | |||
4786 | struct block_device *bdev; | 4550 | struct block_device *bdev; |
4787 | 4551 | ||
4788 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { | 4552 | if (!test_bit(MD_RECOVERY_INTR, &conf->mddev->recovery)) { |
4789 | conf->mddev->array_size = conf->mddev->size * | 4553 | conf->mddev->array_sectors = 2 * conf->mddev->size * |
4790 | (conf->raid_disks - conf->max_degraded); | 4554 | (conf->raid_disks - conf->max_degraded); |
4791 | set_capacity(conf->mddev->gendisk, conf->mddev->array_size << 1); | 4555 | set_capacity(conf->mddev->gendisk, conf->mddev->array_sectors); |
4792 | conf->mddev->changed = 1; | 4556 | conf->mddev->changed = 1; |
4793 | 4557 | ||
4794 | bdev = bdget_disk(conf->mddev->gendisk, 0); | 4558 | bdev = bdget_disk(conf->mddev->gendisk, 0); |
4795 | if (bdev) { | 4559 | if (bdev) { |
4796 | mutex_lock(&bdev->bd_inode->i_mutex); | 4560 | mutex_lock(&bdev->bd_inode->i_mutex); |
4797 | i_size_write(bdev->bd_inode, (loff_t)conf->mddev->array_size << 10); | 4561 | i_size_write(bdev->bd_inode, |
4562 | (loff_t)conf->mddev->array_sectors << 9); | ||
4798 | mutex_unlock(&bdev->bd_inode->i_mutex); | 4563 | mutex_unlock(&bdev->bd_inode->i_mutex); |
4799 | bdput(bdev); | 4564 | bdput(bdev); |
4800 | } | 4565 | } |