7 files changed, 70 insertions, 61 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index b04f98df94ea..835def11419d 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1,7 +1,7 @@
 /*
 * Copyright (C) 2003 Christophe Saout <christophe@saout.de>
 * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org>
- * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved.
+ * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
 *
 * This file is released under the GPL.
 */
@@ -93,6 +93,8 @@ struct crypt_config {
        struct workqueue_struct *io_queue;
        struct workqueue_struct *crypt_queue;
+        wait_queue_head_t writeq;
        /*
         * crypto related data
         */
@@ -331,14 +333,7 @@ static void crypt_convert_init(struct crypt_config *cc,
        ctx->idx_out = bio_out ? bio_out->bi_idx : 0;
        ctx->sector = sector + cc->iv_offset;
        init_completion(&ctx->restart);
-        /*
+        atomic_set(&ctx->pending, 1);
-         * Crypto operation can be asynchronous,
-         * ctx->pending is increased after request submission.
-         * We need to ensure that we don't call the crypt finish
-         * operation before pending got incremented
-         * (dependent on crypt submission return code).
-         */
-        atomic_set(&ctx->pending, 2);
 }
 static int crypt_convert_block(struct crypt_config *cc,
@@ -411,43 +406,42 @@ static void crypt_alloc_req(struct crypt_config *cc,
 static int crypt_convert(struct crypt_config *cc,
                         struct convert_context *ctx)
 {
-        int r = 0;
+        int r;
        while(ctx->idx_in < ctx->bio_in->bi_vcnt &&
              ctx->idx_out < ctx->bio_out->bi_vcnt) {
                crypt_alloc_req(cc, ctx);
+                atomic_inc(&ctx->pending);
                r = crypt_convert_block(cc, ctx, cc->req);
                switch (r) {
+                /* async */
                case -EBUSY:
                        wait_for_completion(&ctx->restart);
                        INIT_COMPLETION(ctx->restart);
                        /* fall through*/
                case -EINPROGRESS:
-                        atomic_inc(&ctx->pending);
                        cc->req = NULL;
-                        r = 0;
+                        ctx->sector++;
-                        /* fall through*/
+                        continue;
+                /* sync */
                case 0:
+                        atomic_dec(&ctx->pending);
                        ctx->sector++;
                        continue;
-                }
-                break;
+                /* error */
+                default:
+                        atomic_dec(&ctx->pending);
+                        return r;
+                }
        }
-        /*
+        return 0;
-         * If there are pending crypto operation run async
-         * code. Otherwise process return code synchronously.
-         * The step of 2 ensures that async finish doesn't
-         * call crypto finish too early.
-         */
-        if (atomic_sub_return(2, &ctx->pending))
-                return -EINPROGRESS;
-        return r;
 }
 static void dm_crypt_bio_destructor(struct bio *bio)
@@ -624,8 +618,10 @@ static void kcryptd_io_read(struct dm_crypt_io *io)
 static void kcryptd_io_write(struct dm_crypt_io *io)
 {
        struct bio *clone = io->ctx.bio_out;
+        struct crypt_config *cc = io->target->private;
        generic_make_request(clone);
+        wake_up(&cc->writeq);
 }
 static void kcryptd_io(struct work_struct *work)
@@ -698,7 +694,8 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io)
                r = crypt_convert(cc, &io->ctx);
-                if (r != -EINPROGRESS) {
+                if (atomic_dec_and_test(&io->ctx.pending)) {
+                        /* processed, no running async crypto  */
                        kcryptd_crypt_write_io_submit(io, r, 0);
                        if (unlikely(r < 0))
                                return;
@@ -706,8 +703,12 @@ static void kcryptd_crypt_write_convert_loop(struct dm_crypt_io *io)
                        atomic_inc(&io->pending);
                /* out of memory -> run queues */
-                if (unlikely(remaining))
+                if (unlikely(remaining)) {
+                        /* wait for async crypto then reinitialize pending */
+                        wait_event(cc->writeq, !atomic_read(&io->ctx.pending));
+                        atomic_set(&io->ctx.pending, 1);
                        congestion_wait(WRITE, HZ/100);
+                }
        }
 }
@@ -746,7 +747,7 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
        r = crypt_convert(cc, &io->ctx);
-        if (r != -EINPROGRESS)
+        if (atomic_dec_and_test(&io->ctx.pending))
                kcryptd_crypt_read_done(io, r);
        crypt_dec_pending(io);
@@ -1047,6 +1048,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                goto bad_crypt_queue;
        }
+        init_waitqueue_head(&cc->writeq);
        ti->private = cc;
        return 0;
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index b8e342fe7586..8f25f628ef16 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -114,7 +114,7 @@ static void dec_count(struct io *io, unsigned int region, int error)
                        wake_up_process(io->sleeper);
                else {
-                        int r = io->error;
+                        unsigned long r = io->error;
                        io_notify_fn fn = io->callback;
                        void *context = io->context;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 51605870f898..762cb086bb7f 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -753,7 +753,7 @@ out:
 * are in the no-sync state.  We have to recover these by
 * recopying from the default mirror to all the others.
 *---------------------------------------------------------------*/
-static void recovery_complete(int read_err, unsigned int write_err,
+static void recovery_complete(int read_err, unsigned long write_err,
                              void *context)
 {
        struct region *reg = (struct region *)context;
@@ -767,7 +767,7 @@ static void recovery_complete(int read_err, unsigned int write_err,
        }
        if (write_err) {
-                DMERR_LIMIT("Write error during recovery (error = 0x%x)",
+                DMERR_LIMIT("Write error during recovery (error = 0x%lx)",
                            write_err);
                /*
                 * Bits correspond to devices (excluding default mirror).
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index ae24eab8cd81..4dc8a43c034b 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -804,7 +804,7 @@ static void commit_callback(void *context, int success)
 * Called when the copy I/O has finished.  kcopyd actually runs
 * this code so don't block.
 */
-static void copy_callback(int read_err, unsigned int write_err, void *context)
+static void copy_callback(int read_err, unsigned long write_err, void *context)
 {
        struct dm_snap_pending_exception *pe = context;
        struct dm_snapshot *s = pe->snap;
diff --git a/drivers/md/kcopyd.c b/drivers/md/kcopyd.c
index f3831f31223e..e76b52ade690 100644
--- a/drivers/md/kcopyd.c
+++ b/drivers/md/kcopyd.c
@@ -169,7 +169,7 @@ struct kcopyd_job {
         * Error state of the job.
         */
        int read_err;
-        unsigned int write_err;
+        unsigned long write_err;
        /*
         * Either READ or WRITE
@@ -293,7 +293,7 @@ static int run_complete_job(struct kcopyd_job *job)
 {
        void *context = job->context;
        int read_err = job->read_err;
-        unsigned int write_err = job->write_err;
+        unsigned long write_err = job->write_err;
        kcopyd_notify_fn fn = job->fn;
        struct kcopyd_client *kc = job->kc;
@@ -396,7 +396,7 @@ static int process_jobs(struct list_head *jobs, int (*fn) (struct kcopyd_job *))
                if (r < 0) {
                        /* error this rogue job */
                        if (job->rw == WRITE)
-                                job->write_err = (unsigned int) -1;
+                                job->write_err = (unsigned long) -1L;
                        else
                                job->read_err = 1;
                        push(&_complete_jobs, job);
@@ -448,8 +448,8 @@ static void dispatch_job(struct kcopyd_job *job)
 }
 #define SUB_JOB_SIZE 128
-static void segment_complete(int read_err,
+static void segment_complete(int read_err, unsigned long write_err,
-                             unsigned int write_err, void *context)
+                             void *context)
 {
        /* FIXME: tidy this function */
        sector_t progress = 0;
diff --git a/drivers/md/kcopyd.h b/drivers/md/kcopyd.h
index 4621ea055c0e..4845f2a0c676 100644
--- a/drivers/md/kcopyd.h
+++ b/drivers/md/kcopyd.h
@@ -32,8 +32,8 @@ void kcopyd_client_destroy(struct kcopyd_client *kc);
 * read_err is a boolean,
 * write_err is a bitset, with 1 bit for each destination region
 */
-typedef void (*kcopyd_notify_fn)(int read_err,
+typedef void (*kcopyd_notify_fn)(int read_err, unsigned long write_err,
-                                 unsigned int write_err, void *context);
+                                 void *context);
 int kcopyd_copy(struct kcopyd_client *kc, struct io_region *from,
                unsigned int num_dests, struct io_region *dests,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index c574cf5efb5c..b162b839a662 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2348,25 +2348,15 @@ static void handle_issuing_new_write_requests6(raid5_conf_t *conf,
 static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                struct stripe_head_state *s, int disks)
 {
+        int canceled_check = 0;
        set_bit(STRIPE_HANDLE, &sh->state);
-        /* Take one of the following actions:
-         * 1/ start a check parity operation if (uptodate == disks)
-         * 2/ finish a check parity operation and act on the result
-         * 3/ skip to the writeback section if we previously
-         *    initiated a recovery operation
-         */
-        if (s->failed == 0 &&
-            !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-                if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
-                        BUG_ON(s->uptodate != disks);
-                        clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
-                        sh->ops.count++;
-                        s->uptodate--;
-                } else if (
-                       test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
-                        clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
-                        clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+        /* complete a check operation */
+        if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
+            clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
+            clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+                if (s->failed == 0) {
                        if (sh->ops.zero_sum_result == 0)
                                /* parity is correct (on disc,
                                 * not in buffer any more)
@@ -2391,7 +2381,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                                        s->uptodate++;
                                }
                        }
-                }
+                } else
+                        canceled_check = 1; /* STRIPE_INSYNC is not set */
        }
        /* check if we can clear a parity disk reconstruct */
@@ -2404,12 +2395,28 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
                clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
        }
+        /* start a new check operation if there are no failures, the stripe is
+         * not insync, and a repair is not in flight
+         */
+        if (s->failed == 0 &&
+            !test_bit(STRIPE_INSYNC, &sh->state) &&
+            !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+                if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
+                        BUG_ON(s->uptodate != disks);
+                        clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
+                        sh->ops.count++;
+                        s->uptodate--;
+                }
+        }
        /* Wait for check parity and compute block operations to complete
-         * before write-back
+         * before write-back.  If a failure occurred while the check operation
+         * was in flight we need to cycle this stripe through handle_stripe
+         * since the parity block may not be uptodate
         */
-        if (!test_bit(STRIPE_INSYNC, &sh->state) &&
+        if (!canceled_check && !test_bit(STRIPE_INSYNC, &sh->state) &&
-                !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
+            !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) &&
-                !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
+            !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) {
                struct r5dev *dev;
                /* either failed parity check, or recovery is happening */
                if (s->failed == 0)