1 files changed, 61 insertions, 33 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 087eee0cb809..3b27df52456b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -94,6 +94,8 @@
 #define __inline__
 #endif
+#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
 #if !RAID6_USE_EMPTY_ZERO_PAGE
 /* In .bss so it's zeroed */
 const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error)
                set_bit(R5_UPTODATE, &sh->dev[i].flags);
                if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
                        rdev = conf->disks[i].rdev;
-                        printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
+                        printk_rl(KERN_INFO "raid5:%s: read error corrected"
-                               mdname(conf->mddev), STRIPE_SECTORS,
+                                  " (%lu sectors at %llu on %s)\n",
-                               (unsigned long long)(sh->sector + rdev->data_offset),
+                                  mdname(conf->mddev), STRIPE_SECTORS,
-                               bdevname(rdev->bdev, b));
+                                  (unsigned long long)(sh->sector
+                                                       + rdev->data_offset),
+                                  bdevname(rdev->bdev, b));
                        clear_bit(R5_ReadError, &sh->dev[i].flags);
                        clear_bit(R5_ReWrite, &sh->dev[i].flags);
                }
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error)
                clear_bit(R5_UPTODATE, &sh->dev[i].flags);
                atomic_inc(&rdev->read_errors);
                if (conf->mddev->degraded)
-                        printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n",
+                        printk_rl(KERN_WARNING
-                               mdname(conf->mddev),
+                                  "raid5:%s: read error not correctable "
-                               (unsigned long long)(sh->sector + rdev->data_offset),
+                                  "(sector %llu on %s).\n",
-                               bdn);
+                                  mdname(conf->mddev),
+                                  (unsigned long long)(sh->sector
+                                                       + rdev->data_offset),
+                                  bdn);
                else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
                        /* Oh, no!!! */
-                        printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n",
+                        printk_rl(KERN_WARNING
-                               mdname(conf->mddev),
+                                  "raid5:%s: read error NOT corrected!! "
-                               (unsigned long long)(sh->sector + rdev->data_offset),
+                                  "(sector %llu on %s).\n",
-                               bdn);
+                                  mdname(conf->mddev),
+                                  (unsigned long long)(sh->sector
+                                                       + rdev->data_offset),
+                                  bdn);
                else if (atomic_read(&rdev->read_errors)
                         > conf->max_nr_stripes)
                        printk(KERN_WARNING
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
                        /*
                         * if recovery was running, make sure it aborts.
                         */
-                        set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+                        set_bit(MD_RECOVERY_INTR, &mddev->recovery);
                }
                set_bit(Faulty, &rdev->flags);
                printk (KERN_ALERT
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
                 * have quiesced.
                 */
                if ((s->uptodate == disks - 1) &&
+                    (s->failed && disk_idx == s->failed_num) &&
                    !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
                        set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
                        set_bit(R5_Wantcompute, &dev->flags);
@@ -2006,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
                         */
                        s->uptodate++;
                        return 0; /* uptodate + compute == disks */
-                } else if ((s->uptodate < disks - 1) &&
+                } else if (test_bit(R5_Insync, &dev->flags)) {
-                        test_bit(R5_Insync, &dev->flags)) {
-                        /* Note: we hold off compute operations while checks are
-                         * in flight, but we still prefer 'compute' over 'read'
-                         * hence we only read if (uptodate < * disks-1)
-                         */
                        set_bit(R5_LOCKED, &dev->flags);
                        set_bit(R5_Wantread, &dev->flags);
                        if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
@@ -2077,7 +2083,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh,
                        /* we would like to get this block, possibly
                         * by computing it, but we might not be able to
                         */
-                        if (s->uptodate == disks-1) {
+                        if ((s->uptodate == disks - 1) &&
+                            (s->failed && (i == r6s->failed_num[0] ||
+                                           i == r6s->failed_num[1]))) {
                                pr_debug("Computing stripe %llu block %d\n",
                                       (unsigned long long)sh->sector, i);
                                compute_block_1(sh, i, 0);
@@ -2369,8 +2377,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
        /* complete a check operation */
        if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
-            clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
+                clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
-            clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+                clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
                if (s->failed == 0) {
                        if (sh->ops.zero_sum_result == 0)
                                /* parity is correct (on disc,
@@ -2400,16 +2408,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                        canceled_check = 1; /* STRIPE_INSYNC is not set */
        }
-        /* check if we can clear a parity disk reconstruct */
-        if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
-                test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-                clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
-                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
-                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
-                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
-        }
        /* start a new check operation if there are no failures, the stripe is
         * not insync, and a repair is not in flight
         */
@@ -2424,6 +2422,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                }
        }
+        /* check if we can clear a parity disk reconstruct */
+        if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
+            test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+                clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
+                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
+                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
+                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+        }
        /* Wait for check parity and compute block operations to complete
         * before write-back.  If a failure occurred while the check operation
         * was in flight we need to cycle this stripe through handle_stripe
@@ -2634,6 +2643,7 @@ static void handle_stripe5(struct stripe_head *sh)
        struct r5dev *dev;
        unsigned long pending = 0;
        mdk_rdev_t *blocked_rdev = NULL;
+        int prexor;
        memset(&s, 0, sizeof(s));
        pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2763,9 +2773,11 @@ static void handle_stripe5(struct stripe_head *sh)
        /* leave prexor set until postxor is done, allows us to distinguish
         * a rmw from a rcw during biodrain
         */
+        prexor = 0;
        if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
                test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+                prexor = 1;
                clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
                clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
                clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
@@ -2799,6 +2811,8 @@ static void handle_stripe5(struct stripe_head *sh)
                                if (!test_and_set_bit(
                                    STRIPE_OP_IO, &sh->ops.pending))
                                        sh->ops.count++;
+                                if (prexor)
+                                        continue;
                                if (!test_bit(R5_Insync, &dev->flags) ||
                                    (i == sh->pd_idx && s.failed == 0))
                                        set_bit(STRIPE_INSYNC, &sh->state);
@@ -2879,6 +2893,8 @@ static void handle_stripe5(struct stripe_head *sh)
                for (i = conf->raid_disks; i--; ) {
                        set_bit(R5_Wantwrite, &sh->dev[i].flags);
+                        set_bit(R5_LOCKED, &dev->flags);
+                        s.locked++;
                        if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
                                sh->ops.count++;
                }
@@ -2892,6 +2908,7 @@ static void handle_stripe5(struct stripe_head *sh)
                        conf->raid_disks);
                s.locked += handle_write_operations5(sh, 1, 1);
        } else if (s.expanded &&
+                   s.locked == 0 &&
                !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
                clear_bit(STRIPE_EXPAND_READY, &sh->state);
                atomic_dec(&conf->reshape_stripes);
@@ -4256,6 +4273,7 @@ static int run(mddev_t *mddev)
                        goto abort;
        }
        spin_lock_init(&conf->device_lock);
+        mddev->queue->queue_lock = &conf->device_lock;
        init_waitqueue_head(&conf->wait_for_stripe);
        init_waitqueue_head(&conf->wait_for_overlap);
        INIT_LIST_HEAD(&conf->handle_list);
@@ -4285,7 +4303,9 @@ static int run(mddev_t *mddev)
                                " disk %d\n", bdevname(rdev->bdev,b),
                                raid_disk);
                        working_disks++;
-                }
+                } else
+                        /* Cannot rely on bitmap to complete recovery */
+                        conf->fullsync = 1;
        }
        /*
@@ -4562,6 +4582,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
                        err = -EBUSY;
                        goto abort;
                }
+                /* Only remove non-faulty devices if recovery
+                 * isn't possible.
+                 */
+                if (!test_bit(Faulty, &rdev->flags) &&
+                    mddev->degraded <= conf->max_degraded) {
+                        err = -EBUSY;
+                        goto abort;
+                }
                p->rdev = NULL;
                synchronize_rcu();
                if (atomic_read(&rdev->nr_pending)) {